mirror of
https://github.com/trekhleb/javascript-algorithms.git
synced 2025-07-06 17:44:08 +08:00
Add regular expression matching algorithm.
This commit is contained in:
@ -76,6 +76,7 @@ a set of rules that precisely define a sequence of operations.
|
|||||||
* `A` [Z Algorithm](src/algorithms/string/z-algorithm) - substring search (pattern matching)
|
* `A` [Z Algorithm](src/algorithms/string/z-algorithm) - substring search (pattern matching)
|
||||||
* `A` [Rabin Karp Algorithm](src/algorithms/string/rabin-karp) - substring search
|
* `A` [Rabin Karp Algorithm](src/algorithms/string/rabin-karp) - substring search
|
||||||
* `A` [Longest Common Substring](src/algorithms/string/longest-common-substring)
|
* `A` [Longest Common Substring](src/algorithms/string/longest-common-substring)
|
||||||
|
* `A` [Regular Expression Matching](src/algorithms/string/regular-expression-matching)
|
||||||
* **Searches**
|
* **Searches**
|
||||||
* `B` [Linear Search](src/algorithms/search/linear-search)
|
* `B` [Linear Search](src/algorithms/search/linear-search)
|
||||||
* `B` [Binary Search](src/algorithms/search/binary-search)
|
* `B` [Binary Search](src/algorithms/search/binary-search)
|
||||||
@ -147,6 +148,7 @@ algorithm is an abstraction higher than a computer program.
|
|||||||
* `A` [Integer Partition](src/algorithms/math/integer-partition)
|
* `A` [Integer Partition](src/algorithms/math/integer-partition)
|
||||||
* `A` [Maximum Subarray](src/algorithms/sets/maximum-subarray)
|
* `A` [Maximum Subarray](src/algorithms/sets/maximum-subarray)
|
||||||
* `A` [Bellman-Ford Algorithm](src/algorithms/graph/bellman-ford) - finding shortest path to all graph vertices
|
* `A` [Bellman-Ford Algorithm](src/algorithms/graph/bellman-ford) - finding shortest path to all graph vertices
|
||||||
|
* `A` [Regular Expression Matching](src/algorithms/string/regular-expression-matching)
|
||||||
* **Backtracking** - similarly to brute force, try to generate all possible solutions, but each time you generate next solution you test
|
* **Backtracking** - similarly to brute force, try to generate all possible solutions, but each time you generate next solution you test
|
||||||
if it satisfies all conditions, and only then continue generating subsequent solutions. Otherwise, backtrack, and go on a
|
if it satisfies all conditions, and only then continue generating subsequent solutions. Otherwise, backtrack, and go on a
|
||||||
different path of finding a solution. Normally the DFS traversal of state-space is being used.
|
different path of finding a solution. Normally the DFS traversal of state-space is being used.
|
||||||
|
73
src/algorithms/string/regular-expression-matching/README.md
Normal file
73
src/algorithms/string/regular-expression-matching/README.md
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# Regular Expression Matching
|
||||||
|
|
||||||
|
Given an input string `s` and a pattern `p`, implement regular
|
||||||
|
expression matching with support for `.` and `*`.
|
||||||
|
|
||||||
|
- `.` Matches any single character.
|
||||||
|
- `*` Matches zero or more of the preceding element.
|
||||||
|
|
||||||
|
The matching should cover the **entire** input string (not partial).
|
||||||
|
|
||||||
|
**Note**
|
||||||
|
|
||||||
|
- `s` could be empty and contains only lowercase letters `a-z`.
|
||||||
|
- `p` could be empty and contains only lowercase letters `a-z`, and characters like `.` or `*`.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
**Example #1**
|
||||||
|
|
||||||
|
Input:
|
||||||
|
```
|
||||||
|
s = 'aa'
|
||||||
|
p = 'a'
|
||||||
|
```
|
||||||
|
|
||||||
|
Output: `false`
|
||||||
|
|
||||||
|
Explanation: `a` does not match the entire string `aa`.
|
||||||
|
|
||||||
|
**Example #2**
|
||||||
|
|
||||||
|
Input:
|
||||||
|
```
|
||||||
|
s = 'aa'
|
||||||
|
p = 'a*'
|
||||||
|
```
|
||||||
|
|
||||||
|
Output: `true`
|
||||||
|
|
||||||
|
Explanation: `*` means zero or more of the preceding element, `a`.
|
||||||
|
Therefore, by repeating `a` once, it becomes `aa`.
|
||||||
|
|
||||||
|
**Example #3**
|
||||||
|
|
||||||
|
Input:
|
||||||
|
|
||||||
|
```
|
||||||
|
s = 'ab'
|
||||||
|
p = '.*'
|
||||||
|
```
|
||||||
|
|
||||||
|
Output: `true`
|
||||||
|
|
||||||
|
Explanation: `.*` means "zero or more (`*`) of any character (`.`)".
|
||||||
|
|
||||||
|
**Example #4**
|
||||||
|
|
||||||
|
Input:
|
||||||
|
|
||||||
|
```
|
||||||
|
s = 'aab'
|
||||||
|
p = 'c*a*b'
|
||||||
|
```
|
||||||
|
|
||||||
|
Output: `true`
|
||||||
|
|
||||||
|
Explanation: `c` can be repeated 0 times, `a` can be repeated
|
||||||
|
1 time. Therefore it matches `aab`.
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- [YouTube](https://www.youtube.com/watch?v=l3hda49XcDE&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8&index=71&t=0s)
|
||||||
|
- [LeetCode](https://leetcode.com/problems/regular-expression-matching/description/)
|
@ -0,0 +1,34 @@
|
|||||||
|
import regularExpressionMatching from '../regularExpressionMatching';
|
||||||
|
|
||||||
|
describe('regularExpressionMatching', () => {
|
||||||
|
it('should match regular expressions in a string', () => {
|
||||||
|
expect(regularExpressionMatching('', '')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('a', 'a')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aa', 'aa')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aab', 'aab')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aab', 'aa.')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aab', '.a.')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aab', '...')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('a', 'a*')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aaa', 'a*')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aaab', 'a*b')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aaabb', 'a*b*')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aaabb', 'a*b*c*')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('', 'a*')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('xaabyc', 'xa*b.c')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('aab', 'c*a*b*')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('mississippi', 'mis*is*.p*.')).toBeTruthy();
|
||||||
|
expect(regularExpressionMatching('ab', '.*')).toBeTruthy();
|
||||||
|
|
||||||
|
expect(regularExpressionMatching('', 'a')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('a', '')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('aab', 'aa')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('aab', 'baa')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('aabc', '...')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('aaabbdd', 'a*b*c*')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('mississippi', 'mis*is*p*.')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('ab', 'a*')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('abba', 'a*b*.c')).toBeFalsy();
|
||||||
|
expect(regularExpressionMatching('abba', '.*c')).toBeFalsy();
|
||||||
|
});
|
||||||
|
});
|
@ -0,0 +1,135 @@
|
|||||||
|
const ZERO_OR_MORE_CHARS = '*';
|
||||||
|
const ANY_CHAR = '.';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dynamic programming approach.
|
||||||
|
*
|
||||||
|
* @param {string} string
|
||||||
|
* @param {string} pattern
|
||||||
|
* @return {boolean}
|
||||||
|
*/
|
||||||
|
export default function regularExpressionMatching(string, pattern) {
|
||||||
|
/*
|
||||||
|
* Let's initiate dynamic programming matrix for this string and pattern.
|
||||||
|
* We will have pattern characters on top (as columns) and string characters
|
||||||
|
* will be placed to the left of the table (as rows).
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* a * b . b
|
||||||
|
* - - - - - -
|
||||||
|
* a - - - - - -
|
||||||
|
* a - - - - - -
|
||||||
|
* b - - - - - -
|
||||||
|
* y - - - - - -
|
||||||
|
* b - - - - - -
|
||||||
|
*/
|
||||||
|
const matchMatrix = Array(string.length + 1).fill(null).map(() => {
|
||||||
|
return Array(pattern.length + 1).fill(null);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Let's fill the top-left cell with true. This would mean that empty
|
||||||
|
// string '' matches to empty pattern ''.
|
||||||
|
matchMatrix[0][0] = true;
|
||||||
|
|
||||||
|
// Let's fill the first row of the matrix with false. That would mean that
|
||||||
|
// empty string can't match any non-empty pattern.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// string: ''
|
||||||
|
// pattern: 'a.z'
|
||||||
|
//
|
||||||
|
// The one exception here is patterns like a*b* that matches the empty string.
|
||||||
|
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
|
||||||
|
const patternIndex = columnIndex - 1;
|
||||||
|
|
||||||
|
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
|
||||||
|
matchMatrix[0][columnIndex] = matchMatrix[0][columnIndex - 2];
|
||||||
|
} else {
|
||||||
|
matchMatrix[0][columnIndex] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let's fill the first column with false. That would mean that empty pattern
|
||||||
|
// can't match any non-empty string.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// string: 'ab'
|
||||||
|
// pattern: ''
|
||||||
|
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
|
||||||
|
matchMatrix[rowIndex][0] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not let's go through every letter of the pattern and every letter of
|
||||||
|
// the string and compare them one by one.
|
||||||
|
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
|
||||||
|
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
|
||||||
|
// Take into account that fact that matrix contain one extra column and row.
|
||||||
|
const stringIndex = rowIndex - 1;
|
||||||
|
const patternIndex = columnIndex - 1;
|
||||||
|
|
||||||
|
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
|
||||||
|
/*
|
||||||
|
* In case if current pattern character is special '*' character we have
|
||||||
|
* two options:
|
||||||
|
*
|
||||||
|
* 1. Since * char allows it previous char to not be presented in a string we
|
||||||
|
* need to check if string matches the pattern without '*' char and without the
|
||||||
|
* char that goes before '*'. That would mean to go two positions left on the
|
||||||
|
* same row.
|
||||||
|
*
|
||||||
|
* 2. Since * char allows it previous char to be presented in a string many times we
|
||||||
|
* need to check if char before * is the same as current string char. If they are the
|
||||||
|
* same that would mean that current string matches the current pattern in case if
|
||||||
|
* the string WITHOUT current char matches the same pattern. This would mean to go
|
||||||
|
* one position up in the same row.
|
||||||
|
*/
|
||||||
|
if (matchMatrix[rowIndex][columnIndex - 2] === true) {
|
||||||
|
matchMatrix[rowIndex][columnIndex] = true;
|
||||||
|
} else if (
|
||||||
|
(
|
||||||
|
pattern[patternIndex - 1] === string[stringIndex] ||
|
||||||
|
pattern[patternIndex - 1] === ANY_CHAR
|
||||||
|
) &&
|
||||||
|
matchMatrix[rowIndex - 1][columnIndex] === true
|
||||||
|
) {
|
||||||
|
matchMatrix[rowIndex][columnIndex] = true;
|
||||||
|
} else {
|
||||||
|
matchMatrix[rowIndex][columnIndex] = false;
|
||||||
|
}
|
||||||
|
} else if (
|
||||||
|
pattern[patternIndex] === string[stringIndex] ||
|
||||||
|
pattern[patternIndex] === ANY_CHAR
|
||||||
|
) {
|
||||||
|
/*
|
||||||
|
* In case if current pattern char is the same as current string char
|
||||||
|
* or it may be any character (in case if pattern contains '.' char)
|
||||||
|
* we need to check if there was a match for the pattern and for the
|
||||||
|
* string by WITHOUT current char. This would mean that we may copy
|
||||||
|
* left-top diagonal value.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* a b
|
||||||
|
* a 1 -
|
||||||
|
* b - 1
|
||||||
|
*/
|
||||||
|
matchMatrix[rowIndex][columnIndex] = matchMatrix[rowIndex - 1][columnIndex - 1];
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* In case if pattern char and string char are different we may
|
||||||
|
* treat this case as "no-match".
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* a b
|
||||||
|
* a - -
|
||||||
|
* c - 0
|
||||||
|
*/
|
||||||
|
matchMatrix[rowIndex][columnIndex] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matchMatrix[string.length][pattern.length];
|
||||||
|
}
|
Reference in New Issue
Block a user