mirror of
https://github.com/TheAlgorithms/Java.git
synced 2025-07-28 06:55:02 +08:00
Add AhoCorasick
(#4465)
* Added code to find Articulation Points and Bridges * tried to solve clang-formant test * removed new line at EOF to get lint to pass * feature: Added Ahocorasick Algorithm * fixed lint using clang-format * removed datastructures/graphs/ArticulationPointsAndBridge.java from this branch * removed main, since test-file is added. Also modified and renamed few functions. * Added test-file for AhoCorasick Algorithm * Modified some comments in test-file * Modified some comments in AhoCorasick.java * lint fix * added few more test cases * Modified some comments * Change all class fields to private, added initializeSuffixLinksForChildNodesOfTheRoot() method, hashmap string search position (also has previous index based search), removed java.util.* * Added Missing Test-Cases and more * minor text changes * added direct test check i.e. defining a variable expected and just checking if res and expected are equal. * Created New Class Trie, merged 'buildTrie and buildSuffixAndOutputLinks' with 'Trie constructor'. Merged setUpStartPoints with searchIn. Now AhoCorasick contains -> inner class: Trie, Node. Methods: search and convert. Trie has -> Methods : constructor and searchIn * Updated TestFile according to the updated AhoCorasick Class. Added Few more test cases * updated - broken down constructor to relavent parts, made string final, made res local to searchIn(), doxygen-like style * lint fix clang * Updated Tests Files * Added final field to Node class setters and Trie Constructor arguments, removed getTrieRoot() and some unnecessory comments, renamed [old -> new]: res -> positionByStringIndexValue, removed if condition from setupStartPoints() * updated test file * lint fix clang * minor chage - 'removed a comment' * added final fields to some arguments, class and variables, added a method initializePositionByStringIndexValue() * updated to remove * inclusion and added the required modules only * Implemented a new class PatternPositionRecorder to wrap up the position recording in searchIn() * Added final fields to PatternPositionRecorder Class * style: mark default constructor of `AhoCorasick` as `private` * style: remoce redundant `public` --------- Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com>
This commit is contained in:
120
src/test/java/com/thealgorithms/strings/AhoCorasickTest.java
Normal file
120
src/test/java/com/thealgorithms/strings/AhoCorasickTest.java
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Tests For Aho-Corasick String Matching Algorithm
|
||||
*
|
||||
* Author: Prabhat-Kumar-42
|
||||
* GitHub: https://github.com/Prabhat-Kumar-42
|
||||
*/
|
||||
|
||||
package com.thealgorithms.strings;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* This class contains test cases for the Aho-Corasick String Matching Algorithm.
|
||||
* The Aho-Corasick algorithm is used to efficiently find all occurrences of multiple
|
||||
* patterns in a given text.
|
||||
*/
|
||||
class AhoCorasickTest {
|
||||
private String[] patterns; // The array of patterns to search for
|
||||
private String text; // The input text to search within
|
||||
|
||||
/**
|
||||
* This method sets up the test environment before each test case.
|
||||
* It initializes the patterns and text to be used for testing.
|
||||
*/
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
patterns = new String[] {"ACC", "ATC", "CAT", "GCG", "C", "T"};
|
||||
text = "GCATCG";
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching for multiple patterns in the input text.
|
||||
* The expected results are defined for each pattern.
|
||||
*/
|
||||
@Test
|
||||
void testSearch() {
|
||||
// Define the expected results for each pattern
|
||||
final var expected = Map.of("ACC", new ArrayList<>(Arrays.asList()), "ATC", new ArrayList<>(Arrays.asList(2)), "CAT", new ArrayList<>(Arrays.asList(1)), "GCG", new ArrayList<>(Arrays.asList()), "C", new ArrayList<>(Arrays.asList(1, 4)), "T", new ArrayList<>(Arrays.asList(3)));
|
||||
assertEquals(expected, AhoCorasick.search(text, patterns));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching with an empty pattern array.
|
||||
* The result should be an empty map.
|
||||
*/
|
||||
@Test
|
||||
void testEmptyPatterns() {
|
||||
// Define an empty pattern array
|
||||
final var emptyPatterns = new String[] {};
|
||||
assertTrue(AhoCorasick.search(text, emptyPatterns).isEmpty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching for patterns that are not present in the input text.
|
||||
* The result should be an empty list for each pattern.
|
||||
*/
|
||||
@Test
|
||||
void testPatternNotFound() {
|
||||
// Define patterns that are not present in the text
|
||||
final var searchPatterns = new String[] {"XYZ", "123"};
|
||||
final var expected = Map.of("XYZ", new ArrayList<Integer>(), "123", new ArrayList<Integer>());
|
||||
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching for patterns that start at the beginning of the input text.
|
||||
* The expected position for each pattern is 0.
|
||||
*/
|
||||
@Test
|
||||
void testPatternAtBeginning() {
|
||||
// Define patterns that start at the beginning of the text
|
||||
final var searchPatterns = new String[] {"GC", "GCA", "GCAT"};
|
||||
final var expected = Map.of("GC", new ArrayList<Integer>(Arrays.asList(0)), "GCA", new ArrayList<Integer>(Arrays.asList(0)), "GCAT", new ArrayList<Integer>(Arrays.asList(0)));
|
||||
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching for patterns that end at the end of the input text.
|
||||
* The expected positions are 4, 3, and 2 for the patterns.
|
||||
*/
|
||||
@Test
|
||||
void testPatternAtEnd() {
|
||||
// Define patterns that end at the end of the text
|
||||
final var searchPatterns = new String[] {"CG", "TCG", "ATCG"};
|
||||
final var expected = Map.of("CG", new ArrayList<Integer>(Arrays.asList(4)), "TCG", new ArrayList<Integer>(Arrays.asList(3)), "ATCG", new ArrayList<Integer>(Arrays.asList(2)));
|
||||
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching for patterns with multiple occurrences in the input text.
|
||||
* The expected sizes are 1 and 1, and the expected positions are 2 and 3
|
||||
* for the patterns "AT" and "T" respectively.
|
||||
*/
|
||||
@Test
|
||||
void testMultipleOccurrencesOfPattern() {
|
||||
// Define patterns with multiple occurrences in the text
|
||||
final var searchPatterns = new String[] {"AT", "T"};
|
||||
final var expected = Map.of("AT", new ArrayList<Integer>(Arrays.asList(2)), "T", new ArrayList<Integer>(Arrays.asList(3)));
|
||||
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test searching for patterns in a case-insensitive manner.
|
||||
* The search should consider patterns regardless of their case.
|
||||
*/
|
||||
@Test
|
||||
void testCaseInsensitiveSearch() {
|
||||
// Define patterns with different cases
|
||||
final var searchPatterns = new String[] {"gca", "aTc", "C"};
|
||||
final var expected = Map.of("gca", new ArrayList<Integer>(), "aTc", new ArrayList<Integer>(), "C", new ArrayList<Integer>(Arrays.asList(1, 4)));
|
||||
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user