Add AhoCorasick (#4465)

* Added code to find Articulation Points and Bridges

* tried to solve clang-formant test

* removed new line at EOF to get lint to pass

* feature: Added Ahocorasick Algorithm

* fixed lint using clang-format

* removed datastructures/graphs/ArticulationPointsAndBridge.java from this branch

* removed main, since test-file is added. Also modified and renamed few functions.

* Added test-file for AhoCorasick Algorithm

* Modified some comments in test-file

* Modified some comments in AhoCorasick.java

* lint fix

* added few more test cases

* Modified some comments

* Change all class fields to private, added initializeSuffixLinksForChildNodesOfTheRoot() method, hashmap string search position (also has previous index based search), removed java.util.*

* Added Missing Test-Cases and more

* minor text changes

* added direct test check i.e. defining a variable expected and just checking if res and expected are equal.

* Created New Class Trie, merged 'buildTrie and buildSuffixAndOutputLinks' with 'Trie constructor'. Merged setUpStartPoints with searchIn. Now AhoCorasick contains -> inner class: Trie, Node. Methods: search and convert. Trie has -> Methods : constructor and searchIn

* Updated TestFile according to the updated AhoCorasick Class. Added Few more test cases

* updated - broken down constructor to relavent parts, made string final, made res local to searchIn(), doxygen-like style

* lint fix clang

* Updated Tests Files

* Added final field to Node class setters and Trie Constructor arguments, removed getTrieRoot() and some unnecessory comments, renamed [old -> new]: res -> positionByStringIndexValue, removed if condition from setupStartPoints()

* updated test file

* lint fix clang

* minor chage - 'removed a comment'

* added final fields to some arguments, class and variables, added a method initializePositionByStringIndexValue()

* updated to remove * inclusion and added the required modules only

* Implemented a new class PatternPositionRecorder to wrap up the position recording in searchIn()

* Added final fields to PatternPositionRecorder Class

* style: mark default constructor of `AhoCorasick` as `private`

* style: remoce redundant `public`

---------

Co-authored-by: Piotr Idzik <65706193+vil02@users.noreply.github.com>
This commit is contained in:
Prabhat-Kumar
2023-10-08 18:46:06 +05:30
committed by GitHub
parent 06aa834fa6
commit 8200a791a2
2 changed files with 369 additions and 0 deletions

View File

@ -0,0 +1,120 @@
/*
* Tests For Aho-Corasick String Matching Algorithm
*
* Author: Prabhat-Kumar-42
* GitHub: https://github.com/Prabhat-Kumar-42
*/
package com.thealgorithms.strings;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
/**
* This class contains test cases for the Aho-Corasick String Matching Algorithm.
* The Aho-Corasick algorithm is used to efficiently find all occurrences of multiple
* patterns in a given text.
*/
class AhoCorasickTest {
private String[] patterns; // The array of patterns to search for
private String text; // The input text to search within
/**
* This method sets up the test environment before each test case.
* It initializes the patterns and text to be used for testing.
*/
@BeforeEach
void setUp() {
patterns = new String[] {"ACC", "ATC", "CAT", "GCG", "C", "T"};
text = "GCATCG";
}
/**
* Test searching for multiple patterns in the input text.
* The expected results are defined for each pattern.
*/
@Test
void testSearch() {
// Define the expected results for each pattern
final var expected = Map.of("ACC", new ArrayList<>(Arrays.asList()), "ATC", new ArrayList<>(Arrays.asList(2)), "CAT", new ArrayList<>(Arrays.asList(1)), "GCG", new ArrayList<>(Arrays.asList()), "C", new ArrayList<>(Arrays.asList(1, 4)), "T", new ArrayList<>(Arrays.asList(3)));
assertEquals(expected, AhoCorasick.search(text, patterns));
}
/**
* Test searching with an empty pattern array.
* The result should be an empty map.
*/
@Test
void testEmptyPatterns() {
// Define an empty pattern array
final var emptyPatterns = new String[] {};
assertTrue(AhoCorasick.search(text, emptyPatterns).isEmpty());
}
/**
* Test searching for patterns that are not present in the input text.
* The result should be an empty list for each pattern.
*/
@Test
void testPatternNotFound() {
// Define patterns that are not present in the text
final var searchPatterns = new String[] {"XYZ", "123"};
final var expected = Map.of("XYZ", new ArrayList<Integer>(), "123", new ArrayList<Integer>());
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
}
/**
* Test searching for patterns that start at the beginning of the input text.
* The expected position for each pattern is 0.
*/
@Test
void testPatternAtBeginning() {
// Define patterns that start at the beginning of the text
final var searchPatterns = new String[] {"GC", "GCA", "GCAT"};
final var expected = Map.of("GC", new ArrayList<Integer>(Arrays.asList(0)), "GCA", new ArrayList<Integer>(Arrays.asList(0)), "GCAT", new ArrayList<Integer>(Arrays.asList(0)));
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
}
/**
* Test searching for patterns that end at the end of the input text.
* The expected positions are 4, 3, and 2 for the patterns.
*/
@Test
void testPatternAtEnd() {
// Define patterns that end at the end of the text
final var searchPatterns = new String[] {"CG", "TCG", "ATCG"};
final var expected = Map.of("CG", new ArrayList<Integer>(Arrays.asList(4)), "TCG", new ArrayList<Integer>(Arrays.asList(3)), "ATCG", new ArrayList<Integer>(Arrays.asList(2)));
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
}
/**
* Test searching for patterns with multiple occurrences in the input text.
* The expected sizes are 1 and 1, and the expected positions are 2 and 3
* for the patterns "AT" and "T" respectively.
*/
@Test
void testMultipleOccurrencesOfPattern() {
// Define patterns with multiple occurrences in the text
final var searchPatterns = new String[] {"AT", "T"};
final var expected = Map.of("AT", new ArrayList<Integer>(Arrays.asList(2)), "T", new ArrayList<Integer>(Arrays.asList(3)));
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
}
/**
* Test searching for patterns in a case-insensitive manner.
* The search should consider patterns regardless of their case.
*/
@Test
void testCaseInsensitiveSearch() {
// Define patterns with different cases
final var searchPatterns = new String[] {"gca", "aTc", "C"};
final var expected = Map.of("gca", new ArrayList<Integer>(), "aTc", new ArrayList<Integer>(), "C", new ArrayList<Integer>(Arrays.asList(1, 4)));
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
}
}