mirror of
https://github.com/TheAlgorithms/Java.git
synced 2026-03-13 08:40:43 +08:00
Refactor KMP and RabinKarp: Improve Reusability and Test Coverage (#7250)
* first commit * Running KMPTest and RabinKarpTest with fixed formatting * now build failed error resolved * now build failed error resolved 2 --------- Co-authored-by: Divyansh Saxena <divyanshsaxena@gmail.com> Co-authored-by: Deniz Altunkapan <deniz.altunkapan@outlook.com>
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
package com.thealgorithms.strings;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Implementation of Knuth–Morris–Pratt algorithm Usage: see the main function
|
||||
* for an example
|
||||
@@ -8,16 +11,19 @@ public final class KMP {
|
||||
private KMP() {
|
||||
}
|
||||
|
||||
// a working example
|
||||
/**
|
||||
* find the starting index in string haystack[] that matches the search word P[]
|
||||
*
|
||||
* @param haystack The text to be searched
|
||||
* @param needle The pattern to be searched for
|
||||
* @return A list of starting indices where the pattern is found
|
||||
*/
|
||||
public static List<Integer> kmpMatcher(final String haystack, final String needle) {
|
||||
List<Integer> occurrences = new ArrayList<>();
|
||||
if (haystack == null || needle == null || needle.isEmpty()) {
|
||||
return occurrences;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
final String haystack = "AAAAABAAABA"; // This is the full string
|
||||
final String needle = "AAAA"; // This is the substring that we want to find
|
||||
kmpMatcher(haystack, needle);
|
||||
}
|
||||
|
||||
// find the starting index in string haystack[] that matches the search word P[]
|
||||
public static void kmpMatcher(final String haystack, final String needle) {
|
||||
final int m = haystack.length();
|
||||
final int n = needle.length();
|
||||
final int[] pi = computePrefixFunction(needle);
|
||||
@@ -32,10 +38,11 @@ public final class KMP {
|
||||
}
|
||||
|
||||
if (q == n) {
|
||||
System.out.println("Pattern starts: " + (i + 1 - n));
|
||||
occurrences.add(i + 1 - n);
|
||||
q = pi[q - 1];
|
||||
}
|
||||
}
|
||||
return occurrences;
|
||||
}
|
||||
|
||||
// return the prefix function
|
||||
|
||||
@@ -1,32 +1,30 @@
|
||||
package com.thealgorithms.strings;
|
||||
|
||||
import java.util.Scanner;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Prateek Kumar Oraon (https://github.com/prateekKrOraon)
|
||||
*
|
||||
An implementation of Rabin-Karp string matching algorithm
|
||||
Program will simply end if there is no match
|
||||
* An implementation of Rabin-Karp string matching algorithm
|
||||
* Program will simply end if there is no match
|
||||
*/
|
||||
public final class RabinKarp {
|
||||
private RabinKarp() {
|
||||
}
|
||||
|
||||
public static Scanner scanner = null;
|
||||
public static final int ALPHABET_SIZE = 256;
|
||||
private static final int ALPHABET_SIZE = 256;
|
||||
|
||||
public static void main(String[] args) {
|
||||
scanner = new Scanner(System.in);
|
||||
System.out.println("Enter String");
|
||||
String text = scanner.nextLine();
|
||||
System.out.println("Enter pattern");
|
||||
String pattern = scanner.nextLine();
|
||||
|
||||
int q = 101;
|
||||
searchPat(text, pattern, q);
|
||||
public static List<Integer> search(String text, String pattern) {
|
||||
return search(text, pattern, 101);
|
||||
}
|
||||
|
||||
private static void searchPat(String text, String pattern, int q) {
|
||||
public static List<Integer> search(String text, String pattern, int q) {
|
||||
List<Integer> occurrences = new ArrayList<>();
|
||||
if (text == null || pattern == null || pattern.isEmpty()) {
|
||||
return occurrences;
|
||||
}
|
||||
|
||||
int m = pattern.length();
|
||||
int n = text.length();
|
||||
int t = 0;
|
||||
@@ -35,48 +33,42 @@ public final class RabinKarp {
|
||||
int j = 0;
|
||||
int i = 0;
|
||||
|
||||
h = (int) Math.pow(ALPHABET_SIZE, m - 1) % q;
|
||||
if (m > n) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
// h = pow(ALPHABET_SIZE, m-1) % q
|
||||
for (i = 0; i < m - 1; i++) {
|
||||
h = h * ALPHABET_SIZE % q;
|
||||
}
|
||||
|
||||
for (i = 0; i < m; i++) {
|
||||
// hash value is calculated for each character and then added with the hash value of the
|
||||
// next character for pattern as well as the text for length equal to the length of
|
||||
// pattern
|
||||
p = (ALPHABET_SIZE * p + pattern.charAt(i)) % q;
|
||||
t = (ALPHABET_SIZE * t + text.charAt(i)) % q;
|
||||
}
|
||||
|
||||
for (i = 0; i <= n - m; i++) {
|
||||
// if the calculated hash value of the pattern and text matches then
|
||||
// all the characters of the pattern is matched with the text of length equal to length
|
||||
// of the pattern if all matches then pattern exist in string if not then the hash value
|
||||
// of the first character of the text is subtracted and hash value of the next character
|
||||
// after the end of the evaluated characters is added
|
||||
if (p == t) {
|
||||
// if hash value matches then the individual characters are matched
|
||||
for (j = 0; j < m; j++) {
|
||||
// if not matched then break out of the loop
|
||||
if (text.charAt(i + j) != pattern.charAt(j)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if all characters are matched then pattern exist in the string
|
||||
if (j == m) {
|
||||
System.out.println("Pattern found at index " + i);
|
||||
occurrences.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
// if i<n-m then hash value of the first character of the text is subtracted and hash
|
||||
// value of the next character after the end of the evaluated characters is added to get
|
||||
// the hash value of the next window of characters in the text
|
||||
if (i < n - m) {
|
||||
t = (ALPHABET_SIZE * (t - text.charAt(i) * h) + text.charAt(i + m)) % q;
|
||||
|
||||
// if hash value becomes less than zero than q is added to make it positive
|
||||
t = (t - text.charAt(i) * h % q);
|
||||
if (t < 0) {
|
||||
t = (t + q);
|
||||
t += q;
|
||||
}
|
||||
t = t * ALPHABET_SIZE % q;
|
||||
t = (t + text.charAt(i + m)) % q;
|
||||
}
|
||||
}
|
||||
return occurrences;
|
||||
}
|
||||
}
|
||||
|
||||
29
src/test/java/com/thealgorithms/strings/KMPTest.java
Normal file
29
src/test/java/com/thealgorithms/strings/KMPTest.java
Normal file
@@ -0,0 +1,29 @@
|
||||
package com.thealgorithms.strings;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class KMPTest {
|
||||
|
||||
@Test
|
||||
public void testNullInputs() {
|
||||
assertEquals(List.of(), KMP.kmpMatcher(null, "A"));
|
||||
assertEquals(List.of(), KMP.kmpMatcher("A", null));
|
||||
assertEquals(List.of(), KMP.kmpMatcher(null, null));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKMPMatcher() {
|
||||
assertEquals(List.of(0, 1), KMP.kmpMatcher("AAAAABAAABA", "AAAA"));
|
||||
assertEquals(List.of(0, 3), KMP.kmpMatcher("ABCABC", "ABC"));
|
||||
assertEquals(List.of(10), KMP.kmpMatcher("ABABDABACDABABCABAB", "ABABCABAB"));
|
||||
assertEquals(List.of(), KMP.kmpMatcher("ABCDE", "FGH"));
|
||||
assertEquals(List.of(), KMP.kmpMatcher("A", "AA"));
|
||||
assertEquals(List.of(0, 1, 2), KMP.kmpMatcher("AAA", "A"));
|
||||
assertEquals(List.of(0), KMP.kmpMatcher("A", "A"));
|
||||
assertEquals(List.of(), KMP.kmpMatcher("", "A"));
|
||||
assertEquals(List.of(), KMP.kmpMatcher("A", ""));
|
||||
}
|
||||
}
|
||||
46
src/test/java/com/thealgorithms/strings/RabinKarpTest.java
Normal file
46
src/test/java/com/thealgorithms/strings/RabinKarpTest.java
Normal file
@@ -0,0 +1,46 @@
|
||||
package com.thealgorithms.strings;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class RabinKarpTest {
|
||||
|
||||
@Test
|
||||
public void testNullInputs() {
|
||||
assertEquals(List.of(), RabinKarp.search(null, "A"));
|
||||
assertEquals(List.of(), RabinKarp.search("A", null));
|
||||
assertEquals(List.of(), RabinKarp.search(null, null));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashCollision() {
|
||||
// 'a' = 97. (char)198 % 101 = 97.
|
||||
// For length 1, h = 1. p = 97. t = 198 % 101 = 97.
|
||||
// Collision occurs, loop checks characters: 198 != 97, breaks.
|
||||
char collisionChar = (char) 198;
|
||||
String text = String.valueOf(collisionChar);
|
||||
String pattern = "a";
|
||||
assertEquals(List.of(), RabinKarp.search(text, pattern));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearchWithCustomQ() {
|
||||
// Using a different prime
|
||||
assertEquals(List.of(0, 1), RabinKarp.search("AAAA", "AAA", 13));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRabinKarpSearch() {
|
||||
assertEquals(List.of(0, 1), RabinKarp.search("AAAAABAAABA", "AAAA"));
|
||||
assertEquals(List.of(0, 3), RabinKarp.search("ABCABC", "ABC"));
|
||||
assertEquals(List.of(10), RabinKarp.search("ABABDABACDABABCABAB", "ABABCABAB"));
|
||||
assertEquals(List.of(), RabinKarp.search("ABCDE", "FGH"));
|
||||
assertEquals(List.of(), RabinKarp.search("A", "AA"));
|
||||
assertEquals(List.of(0, 1, 2), RabinKarp.search("AAA", "A"));
|
||||
assertEquals(List.of(0), RabinKarp.search("A", "A"));
|
||||
assertEquals(List.of(), RabinKarp.search("", "A"));
|
||||
assertEquals(List.of(), RabinKarp.search("A", ""));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user