From 50b1bcdc6776477b0e616ec1972997c5be4f08ae Mon Sep 17 00:00:00 2001 From: Saahil Mahato Date: Wed, 8 Oct 2025 21:54:18 +0545 Subject: [PATCH] feat: add damerau-levenshtein distance algorithm (#6706) * feat: add damerau-levenshtein distance algorithm * refactor: remove useless parentheses * refactor: add class link to test --- .../DamerauLevenshteinDistance.java | 185 +++++++++++++++++ .../DamerauLevenshteinDistanceTest.java | 194 ++++++++++++++++++ 2 files changed, 379 insertions(+) create mode 100644 src/main/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistance.java create mode 100644 src/test/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistanceTest.java diff --git a/src/main/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistance.java b/src/main/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistance.java new file mode 100644 index 000000000..9721d4ab0 --- /dev/null +++ b/src/main/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistance.java @@ -0,0 +1,185 @@ +package com.thealgorithms.dynamicprogramming; + +import java.util.HashMap; +import java.util.Map; + +/** + * Implementation of the full Damerau–Levenshtein distance algorithm. + * + * This algorithm calculates the minimum number of operations required + * to transform one string into another. Supported operations are: + * insertion, deletion, substitution, and transposition of adjacent characters. + * + * Unlike the restricted version (OSA), this implementation allows multiple + * edits on the same substring, computing the true edit distance. + * + * Time Complexity: O(n * m * max(n, m)) + * Space Complexity: O(n * m) + */ +public final class DamerauLevenshteinDistance { + + private DamerauLevenshteinDistance() { + // Utility class + } + + /** + * Computes the full Damerau–Levenshtein distance between two strings. + * + * @param s1 the first string + * @param s2 the second string + * @return the minimum edit distance between the two strings + * @throws IllegalArgumentException if either input string is null + */ + public static int distance(String s1, String s2) { + validateInputs(s1, s2); + + int n = s1.length(); + int m = s2.length(); + + Map charLastPosition = buildCharacterMap(s1, s2); + int[][] dp = initializeTable(n, m); + + fillTable(s1, s2, dp, charLastPosition); + + return dp[n + 1][m + 1]; + } + + /** + * Validates that both input strings are not null. + * + * @param s1 the first string to validate + * @param s2 the second string to validate + * @throws IllegalArgumentException if either string is null + */ + private static void validateInputs(String s1, String s2) { + if (s1 == null || s2 == null) { + throw new IllegalArgumentException("Input strings must not be null."); + } + } + + /** + * Builds a character map containing all unique characters from both strings. + * Each character is initialized with a position value of 0. + * + * This map is used to track the last occurrence position of each character + * during the distance computation, which is essential for handling transpositions. + * + * @param s1 the first string + * @param s2 the second string + * @return a map containing all unique characters from both strings, initialized to 0 + */ + private static Map buildCharacterMap(String s1, String s2) { + Map charMap = new HashMap<>(); + for (char c : s1.toCharArray()) { + charMap.putIfAbsent(c, 0); + } + for (char c : s2.toCharArray()) { + charMap.putIfAbsent(c, 0); + } + return charMap; + } + + /** + * Initializes the dynamic programming table for the algorithm. + * + * The table has dimensions (n+2) x (m+2) where n and m are the lengths + * of the input strings. The extra rows and columns are used to handle + * the transposition operation correctly. + * + * The first row and column are initialized with the maximum possible distance, + * while the second row and column represent the base case of transforming + * from an empty string. + * + * @param n the length of the first string + * @param m the length of the second string + * @return an initialized DP table ready for computation + */ + private static int[][] initializeTable(int n, int m) { + int maxDist = n + m; + int[][] dp = new int[n + 2][m + 2]; + + dp[0][0] = maxDist; + + for (int i = 0; i <= n; i++) { + dp[i + 1][0] = maxDist; + dp[i + 1][1] = i; + } + + for (int j = 0; j <= m; j++) { + dp[0][j + 1] = maxDist; + dp[1][j + 1] = j; + } + + return dp; + } + + /** + * Fills the dynamic programming table by computing the minimum edit distance + * for each substring pair. + * + * This method implements the core algorithm logic, iterating through both strings + * and computing the minimum cost of transforming substrings. It considers all + * four operations: insertion, deletion, substitution, and transposition. + * + * The character position map is updated as we progress through the first string + * to enable efficient transposition cost calculation. + * + * @param s1 the first string + * @param s2 the second string + * @param dp the dynamic programming table to fill + * @param charLastPosition map tracking the last position of each character in s1 + */ + private static void fillTable(String s1, String s2, int[][] dp, Map charLastPosition) { + int n = s1.length(); + int m = s2.length(); + + for (int i = 1; i <= n; i++) { + int lastMatchCol = 0; + + for (int j = 1; j <= m; j++) { + char char1 = s1.charAt(i - 1); + char char2 = s2.charAt(j - 1); + + int lastMatchRow = charLastPosition.get(char2); + int cost = (char1 == char2) ? 0 : 1; + + if (char1 == char2) { + lastMatchCol = j; + } + + dp[i + 1][j + 1] = computeMinimumCost(dp, i, j, lastMatchRow, lastMatchCol, cost); + } + + charLastPosition.put(s1.charAt(i - 1), i); + } + } + + /** + * Computes the minimum cost among all possible operations at the current position. + * + * This method evaluates four possible operations: + * 1. Substitution: replace character at position i with character at position j + * 2. Insertion: insert character from s2 at position j + * 3. Deletion: delete character from s1 at position i + * 4. Transposition: swap characters that have been seen before + * + * The transposition cost accounts for the gap between the current position + * and the last position where matching characters were found. + * + * @param dp the dynamic programming table + * @param i the current position in the first string (1-indexed in the DP table) + * @param j the current position in the second string (1-indexed in the DP table) + * @param lastMatchRow the row index where the current character of s2 last appeared in s1 + * @param lastMatchCol the column index where the current character of s1 last matched in s2 + * @param cost the substitution cost (0 if characters match, 1 otherwise) + * @return the minimum cost among all operations + */ + private static int computeMinimumCost(int[][] dp, int i, int j, int lastMatchRow, int lastMatchCol, int cost) { + int substitution = dp[i][j] + cost; + int insertion = dp[i + 1][j] + 1; + int deletion = dp[i][j + 1] + 1; + int transposition = dp[lastMatchRow][lastMatchCol] + i - lastMatchRow - 1 + 1 + j - lastMatchCol - 1; + + return Math.min(Math.min(substitution, insertion), Math.min(deletion, transposition)); + } +} diff --git a/src/test/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistanceTest.java b/src/test/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistanceTest.java new file mode 100644 index 000000000..063d7bef6 --- /dev/null +++ b/src/test/java/com/thealgorithms/dynamicprogramming/DamerauLevenshteinDistanceTest.java @@ -0,0 +1,194 @@ +package com.thealgorithms.dynamicprogramming; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for the {@code DamerauLevenshteinDistance} class. + * Tests cover edge cases, basic operations, and complex transposition scenarios. + */ +class DamerauLevenshteinDistanceTest { + + @Test + @DisplayName("Should throw exception for null first string") + void testNullFirstString() { + assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance(null, "test"); }); + } + + @Test + @DisplayName("Should throw exception for null second string") + void testNullSecondString() { + assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance("test", null); }); + } + + @Test + @DisplayName("Should throw exception for both null strings") + void testBothNullStrings() { + assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance(null, null); }); + } + + @Test + @DisplayName("Should return 0 for identical strings") + void testIdenticalStrings() { + assertEquals(0, DamerauLevenshteinDistance.distance("", "")); + assertEquals(0, DamerauLevenshteinDistance.distance("a", "a")); + assertEquals(0, DamerauLevenshteinDistance.distance("abc", "abc")); + assertEquals(0, DamerauLevenshteinDistance.distance("hello", "hello")); + } + + @Test + @DisplayName("Should return length when one string is empty") + void testEmptyStrings() { + assertEquals(3, DamerauLevenshteinDistance.distance("", "abc")); + assertEquals(5, DamerauLevenshteinDistance.distance("hello", "")); + assertEquals(0, DamerauLevenshteinDistance.distance("", "")); + } + + @Test + @DisplayName("Should handle single character insertions") + void testSingleInsertion() { + assertEquals(1, DamerauLevenshteinDistance.distance("cat", "cats")); + assertEquals(1, DamerauLevenshteinDistance.distance("ab", "abc")); + assertEquals(1, DamerauLevenshteinDistance.distance("", "a")); + } + + @Test + @DisplayName("Should handle single character deletions") + void testSingleDeletion() { + assertEquals(1, DamerauLevenshteinDistance.distance("cats", "cat")); + assertEquals(1, DamerauLevenshteinDistance.distance("abc", "ab")); + assertEquals(1, DamerauLevenshteinDistance.distance("a", "")); + } + + @Test + @DisplayName("Should handle single character substitutions") + void testSingleSubstitution() { + assertEquals(1, DamerauLevenshteinDistance.distance("cat", "bat")); + assertEquals(1, DamerauLevenshteinDistance.distance("abc", "adc")); + assertEquals(1, DamerauLevenshteinDistance.distance("x", "y")); + } + + @Test + @DisplayName("Should handle adjacent character transpositions") + void testAdjacentTransposition() { + assertEquals(1, DamerauLevenshteinDistance.distance("ab", "ba")); + assertEquals(1, DamerauLevenshteinDistance.distance("abc", "bac")); + assertEquals(1, DamerauLevenshteinDistance.distance("hello", "ehllo")); + } + + @Test + @DisplayName("Should correctly compute distance for CA to ABC") + void testCAtoABC() { + // This is the critical test case that differentiates full DL from OSA + // Full DL: 2 (insert A at start, insert B in middle) + // OSA would give: 3 + assertEquals(2, DamerauLevenshteinDistance.distance("CA", "ABC")); + } + + @Test + @DisplayName("Should handle non-adjacent transpositions") + void testNonAdjacentTransposition() { + assertEquals(2, DamerauLevenshteinDistance.distance("abc", "cba")); + assertEquals(3, DamerauLevenshteinDistance.distance("abcd", "dcba")); + } + + @Test + @DisplayName("Should handle multiple operations") + void testMultipleOperations() { + assertEquals(3, DamerauLevenshteinDistance.distance("kitten", "sitting")); + assertEquals(3, DamerauLevenshteinDistance.distance("saturday", "sunday")); + assertEquals(5, DamerauLevenshteinDistance.distance("intention", "execution")); + } + + @Test + @DisplayName("Should handle completely different strings") + void testCompletelyDifferentStrings() { + assertEquals(3, DamerauLevenshteinDistance.distance("abc", "xyz")); + assertEquals(4, DamerauLevenshteinDistance.distance("hello", "world")); + } + + @Test + @DisplayName("Should handle strings with repeated characters") + void testRepeatedCharacters() { + assertEquals(0, DamerauLevenshteinDistance.distance("aaa", "aaa")); + assertEquals(1, DamerauLevenshteinDistance.distance("aaa", "aab")); + assertEquals(1, DamerauLevenshteinDistance.distance("aaa", "aba")); + } + + @Test + @DisplayName("Should be symmetric") + void testSymmetry() { + assertEquals(DamerauLevenshteinDistance.distance("abc", "def"), DamerauLevenshteinDistance.distance("def", "abc")); + assertEquals(DamerauLevenshteinDistance.distance("hello", "world"), DamerauLevenshteinDistance.distance("world", "hello")); + } + + @Test + @DisplayName("Should handle case sensitivity") + void testCaseSensitivity() { + assertEquals(1, DamerauLevenshteinDistance.distance("Hello", "hello")); + assertEquals(5, DamerauLevenshteinDistance.distance("HELLO", "hello")); + } + + @Test + @DisplayName("Should handle single character strings") + void testSingleCharacterStrings() { + assertEquals(1, DamerauLevenshteinDistance.distance("a", "b")); + assertEquals(0, DamerauLevenshteinDistance.distance("a", "a")); + assertEquals(2, DamerauLevenshteinDistance.distance("a", "abc")); + } + + @Test + @DisplayName("Should handle long strings efficiently") + void testLongStrings() { + String s1 = "abcdefghijklmnopqrstuvwxyz"; + String s2 = "abcdefghijklmnopqrstuvwxyz"; + assertEquals(0, DamerauLevenshteinDistance.distance(s1, s2)); + + String s3 = "abcdefghijklmnopqrstuvwxyz"; + String s4 = "zyxwvutsrqponmlkjihgfedcba"; + assertEquals(25, DamerauLevenshteinDistance.distance(s3, s4)); + } + + @Test + @DisplayName("Should satisfy triangle inequality") + void testTriangleInequality() { + // d(a,c) <= d(a,b) + d(b,c) + String a = "cat"; + String b = "hat"; + String c = "rat"; + + int ab = DamerauLevenshteinDistance.distance(a, b); + int bc = DamerauLevenshteinDistance.distance(b, c); + int ac = DamerauLevenshteinDistance.distance(a, c); + + assertTrue(ac <= ab + bc); + } + + @Test + @DisplayName("Should handle special characters") + void testSpecialCharacters() { + assertEquals(0, DamerauLevenshteinDistance.distance("hello!", "hello!")); + assertEquals(1, DamerauLevenshteinDistance.distance("hello!", "hello?")); + assertEquals(1, DamerauLevenshteinDistance.distance("a@b", "a#b")); + } + + @Test + @DisplayName("Should handle numeric strings") + void testNumericStrings() { + assertEquals(1, DamerauLevenshteinDistance.distance("123", "124")); + assertEquals(1, DamerauLevenshteinDistance.distance("123", "213")); + assertEquals(0, DamerauLevenshteinDistance.distance("999", "999")); + } + + @Test + @DisplayName("Should handle unicode characters") + void testUnicodeCharacters() { + assertEquals(0, DamerauLevenshteinDistance.distance("café", "café")); + assertEquals(1, DamerauLevenshteinDistance.distance("café", "cafe")); + assertEquals(0, DamerauLevenshteinDistance.distance("你好", "你好")); + } +}