Add regular expression matching algorithm.

This commit is contained in:
Oleksii Trekhleb
2018-06-25 20:02:24 +03:00
parent c96bbdf00e
commit d69199e658
4 changed files with 244 additions and 0 deletions

View File

@ -0,0 +1,73 @@
# Regular Expression Matching
Given an input string `s` and a pattern `p`, implement regular
expression matching with support for `.` and `*`.
- `.` Matches any single character.
- `*` Matches zero or more of the preceding element.
The matching should cover the **entire** input string (not partial).
**Note**
- `s` could be empty and contains only lowercase letters `a-z`.
- `p` could be empty and contains only lowercase letters `a-z`, and characters like `.` or `*`.
## Examples
**Example #1**
Input:
```
s = 'aa'
p = 'a'
```
Output: `false`
Explanation: `a` does not match the entire string `aa`.
**Example #2**
Input:
```
s = 'aa'
p = 'a*'
```
Output: `true`
Explanation: `*` means zero or more of the preceding element, `a`.
Therefore, by repeating `a` once, it becomes `aa`.
**Example #3**
Input:
```
s = 'ab'
p = '.*'
```
Output: `true`
Explanation: `.*` means "zero or more (`*`) of any character (`.`)".
**Example #4**
Input:
```
s = 'aab'
p = 'c*a*b'
```
Output: `true`
Explanation: `c` can be repeated 0 times, `a` can be repeated
1 time. Therefore it matches `aab`.
## References
- [YouTube](https://www.youtube.com/watch?v=l3hda49XcDE&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8&index=71&t=0s)
- [LeetCode](https://leetcode.com/problems/regular-expression-matching/description/)

View File

@ -0,0 +1,34 @@
import regularExpressionMatching from '../regularExpressionMatching';
describe('regularExpressionMatching', () => {
it('should match regular expressions in a string', () => {
expect(regularExpressionMatching('', '')).toBeTruthy();
expect(regularExpressionMatching('a', 'a')).toBeTruthy();
expect(regularExpressionMatching('aa', 'aa')).toBeTruthy();
expect(regularExpressionMatching('aab', 'aab')).toBeTruthy();
expect(regularExpressionMatching('aab', 'aa.')).toBeTruthy();
expect(regularExpressionMatching('aab', '.a.')).toBeTruthy();
expect(regularExpressionMatching('aab', '...')).toBeTruthy();
expect(regularExpressionMatching('a', 'a*')).toBeTruthy();
expect(regularExpressionMatching('aaa', 'a*')).toBeTruthy();
expect(regularExpressionMatching('aaab', 'a*b')).toBeTruthy();
expect(regularExpressionMatching('aaabb', 'a*b*')).toBeTruthy();
expect(regularExpressionMatching('aaabb', 'a*b*c*')).toBeTruthy();
expect(regularExpressionMatching('', 'a*')).toBeTruthy();
expect(regularExpressionMatching('xaabyc', 'xa*b.c')).toBeTruthy();
expect(regularExpressionMatching('aab', 'c*a*b*')).toBeTruthy();
expect(regularExpressionMatching('mississippi', 'mis*is*.p*.')).toBeTruthy();
expect(regularExpressionMatching('ab', '.*')).toBeTruthy();
expect(regularExpressionMatching('', 'a')).toBeFalsy();
expect(regularExpressionMatching('a', '')).toBeFalsy();
expect(regularExpressionMatching('aab', 'aa')).toBeFalsy();
expect(regularExpressionMatching('aab', 'baa')).toBeFalsy();
expect(regularExpressionMatching('aabc', '...')).toBeFalsy();
expect(regularExpressionMatching('aaabbdd', 'a*b*c*')).toBeFalsy();
expect(regularExpressionMatching('mississippi', 'mis*is*p*.')).toBeFalsy();
expect(regularExpressionMatching('ab', 'a*')).toBeFalsy();
expect(regularExpressionMatching('abba', 'a*b*.c')).toBeFalsy();
expect(regularExpressionMatching('abba', '.*c')).toBeFalsy();
});
});

View File

@ -0,0 +1,135 @@
const ZERO_OR_MORE_CHARS = '*';
const ANY_CHAR = '.';
/**
* Dynamic programming approach.
*
* @param {string} string
* @param {string} pattern
* @return {boolean}
*/
export default function regularExpressionMatching(string, pattern) {
/*
* Let's initiate dynamic programming matrix for this string and pattern.
* We will have pattern characters on top (as columns) and string characters
* will be placed to the left of the table (as rows).
*
* Example:
*
* a * b . b
* - - - - - -
* a - - - - - -
* a - - - - - -
* b - - - - - -
* y - - - - - -
* b - - - - - -
*/
const matchMatrix = Array(string.length + 1).fill(null).map(() => {
return Array(pattern.length + 1).fill(null);
});
// Let's fill the top-left cell with true. This would mean that empty
// string '' matches to empty pattern ''.
matchMatrix[0][0] = true;
// Let's fill the first row of the matrix with false. That would mean that
// empty string can't match any non-empty pattern.
//
// Example:
// string: ''
// pattern: 'a.z'
//
// The one exception here is patterns like a*b* that matches the empty string.
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
const patternIndex = columnIndex - 1;
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
matchMatrix[0][columnIndex] = matchMatrix[0][columnIndex - 2];
} else {
matchMatrix[0][columnIndex] = false;
}
}
// Let's fill the first column with false. That would mean that empty pattern
// can't match any non-empty string.
//
// Example:
// string: 'ab'
// pattern: ''
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
matchMatrix[rowIndex][0] = false;
}
// Not let's go through every letter of the pattern and every letter of
// the string and compare them one by one.
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) {
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) {
// Take into account that fact that matrix contain one extra column and row.
const stringIndex = rowIndex - 1;
const patternIndex = columnIndex - 1;
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) {
/*
* In case if current pattern character is special '*' character we have
* two options:
*
* 1. Since * char allows it previous char to not be presented in a string we
* need to check if string matches the pattern without '*' char and without the
* char that goes before '*'. That would mean to go two positions left on the
* same row.
*
* 2. Since * char allows it previous char to be presented in a string many times we
* need to check if char before * is the same as current string char. If they are the
* same that would mean that current string matches the current pattern in case if
* the string WITHOUT current char matches the same pattern. This would mean to go
* one position up in the same row.
*/
if (matchMatrix[rowIndex][columnIndex - 2] === true) {
matchMatrix[rowIndex][columnIndex] = true;
} else if (
(
pattern[patternIndex - 1] === string[stringIndex] ||
pattern[patternIndex - 1] === ANY_CHAR
) &&
matchMatrix[rowIndex - 1][columnIndex] === true
) {
matchMatrix[rowIndex][columnIndex] = true;
} else {
matchMatrix[rowIndex][columnIndex] = false;
}
} else if (
pattern[patternIndex] === string[stringIndex] ||
pattern[patternIndex] === ANY_CHAR
) {
/*
* In case if current pattern char is the same as current string char
* or it may be any character (in case if pattern contains '.' char)
* we need to check if there was a match for the pattern and for the
* string by WITHOUT current char. This would mean that we may copy
* left-top diagonal value.
*
* Example:
*
* a b
* a 1 -
* b - 1
*/
matchMatrix[rowIndex][columnIndex] = matchMatrix[rowIndex - 1][columnIndex - 1];
} else {
/*
* In case if pattern char and string char are different we may
* treat this case as "no-match".
*
* Example:
*
* a b
* a - -
* c - 0
*/
matchMatrix[rowIndex][columnIndex] = false;
}
}
}
return matchMatrix[string.length][pattern.length];
}