export interface Stylesheet { rules: Rule[]; } export type Rule = QualifiedRule | AtRule; export interface AtRule { type: 'at-rule'; name: string; prelude: InputToken[]; block: SimpleBlock; } export interface QualifiedRule { type: 'qualified-rule'; prelude: InputToken[]; block: SimpleBlock; } // const nonQuoteURLRegEx = /(:?[^\)\s\t\n\r\f\'\"\(]|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*/gym; // TODO: non-printable code points omitted export type InputToken = '(' | ')' | '{' | '}' | '[' | ']' | ':' | ';' | ',' | ' ' | '^=' | '|=' | '$=' | '*=' | '~=' | '' | undefined | /* */ InputTokenObject | FunctionInputToken | FunctionToken | SimpleBlock | AtKeywordToken; export const enum TokenObjectType { /** * */ string = 1, /** * */ delim = 2, /** * */ number = 3, /** * */ percentage = 4, /** * */ dimension = 5, /** * */ ident = 6, /** * */ url = 7, /** * * This is a token indicating a function's leading: ( */ functionToken = 8, /** * */ simpleBlock = 9, /** * */ comment = 10, /** * */ atKeyword = 11, /** * */ hash = 12, /** * * This is a complete consumed function: ([ [, ]*])")" */ function = 14, } export interface InputTokenObject { type: TokenObjectType; text: string; } /** * This is a "(" token. */ export interface FunctionInputToken extends InputTokenObject { name: string; } /** * This is a completely parsed function like "([component [, component]*])". */ export interface FunctionToken extends FunctionInputToken { components: any[]; } export interface SimpleBlock extends InputTokenObject { associatedToken: InputToken; values: InputToken[]; } export type AtKeywordToken = InputTokenObject; const commentRegEx = /(\/\*(?:[^\*]|\*[^\/])*\*\/)/gmy; // eslint-disable-next-line no-control-regex const nameRegEx = /-?(?:(?:[a-zA-Z_]|[^\x00-\x7F]|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))(?:[a-zA-Z_0-9\-]*|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*)/gmy; const numberRegEx = /[\+\-]?(?:\d+\.\d+|\d+|\.\d+)(?:[eE][\+\-]?\d+)?/gmy; const doubleQuoteStringRegEx = /"((?:[^\n\r\f\"]|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*)(:?"|$)/gmy; // Besides $n, parse escape const whitespaceRegEx = /[\s\t\n\r\f]*/gmy; const singleQuoteStringRegEx = /'((?:[^\n\r\f\']|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*)(:?'|$)/gmy; // Besides $n, parse escape /** * CSS parser following relatively close: * CSS Syntax Module Level 3 * https://www.w3.org/TR/css-syntax-3/ */ export class CSS3Parser { private nextInputCodePointIndex = 0; private reconsumedInputToken: InputToken; private topLevelFlag: boolean; constructor(private text: string) {} /** * For testing purposes. * This method allows us to run and assert the proper working of the tokenizer. */ tokenize(): InputToken[] { const tokens: InputToken[] = []; let inputToken: InputToken; do { inputToken = this.consumeAToken(); tokens.push(inputToken); } while (inputToken); return tokens; } /** * 4.3.1. Consume a token * https://www.w3.org/TR/css-syntax-3/#consume-a-token */ private consumeAToken(): InputToken { if (this.reconsumedInputToken) { const result = this.reconsumedInputToken; this.reconsumedInputToken = null; return result; } const char = this.text[this.nextInputCodePointIndex]; switch (char) { case '"': return this.consumeAStringToken(); case "'": return this.consumeAStringToken(); case '(': case ')': case ',': case ':': case ';': case '[': case ']': case '{': case '}': this.nextInputCodePointIndex++; return char; case '#': return this.consumeAHashToken() || this.consumeADelimToken(); case ' ': case '\t': case '\n': case '\r': case '\f': return this.consumeAWhitespace(); case '@': return this.consumeAtKeyword() || this.consumeADelimToken(); // TODO: Only if this is valid escape, otherwise it is a parse error case '\\': return this.consumeAnIdentLikeToken() || this.consumeADelimToken(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return this.consumeANumericToken(); case 'u': case 'U': if (this.text[this.nextInputCodePointIndex + 1] === '+') { const thirdChar = this.text[this.nextInputCodePointIndex + 2]; if ((thirdChar >= '0' && thirdChar <= '9') || thirdChar === '?') { // TODO: Handle unicode stuff such as U+002B throw new Error('Unicode tokens not supported!'); } } return this.consumeAnIdentLikeToken() || this.consumeADelimToken(); case '$': case '*': case '^': case '|': case '~': return this.consumeAMatchToken() || this.consumeADelimToken(); case '-': return this.consumeANumericToken() || this.consumeAnIdentLikeToken() || this.consumeCDC() || this.consumeADelimToken(); case '+': case '.': return this.consumeANumericToken() || this.consumeADelimToken(); case '/': return this.consumeAComment() || this.consumeADelimToken(); case '<': return this.consumeCDO() || this.consumeADelimToken(); case undefined: return undefined; default: return this.consumeAnIdentLikeToken() || this.consumeADelimToken(); } } private consumeADelimToken(): InputToken { return { type: TokenObjectType.delim, text: this.text[this.nextInputCodePointIndex++], }; } private consumeAWhitespace(): InputToken { whitespaceRegEx.lastIndex = this.nextInputCodePointIndex; whitespaceRegEx.exec(this.text); this.nextInputCodePointIndex = whitespaceRegEx.lastIndex; return ' '; } private consumeAHashToken(): InputTokenObject { this.nextInputCodePointIndex++; const hashName = this.consumeAName(); if (hashName) { return { type: TokenObjectType.hash, text: '#' + hashName.text }; } this.nextInputCodePointIndex--; return null; } private consumeCDO(): '' | null { if (this.text.substr(this.nextInputCodePointIndex, 3) === '-->') { this.nextInputCodePointIndex += 3; return '-->'; } return null; } private consumeAMatchToken(): '*=' | '$=' | '|=' | '~=' | '^=' | null { if (this.text[this.nextInputCodePointIndex + 1] === '=') { const token = this.text.substr(this.nextInputCodePointIndex, 2); this.nextInputCodePointIndex += 2; return <'*=' | '$=' | '|=' | '~=' | '^='>token; } return null; } /** * 4.3.2. Consume a numeric token * https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token */ private consumeANumericToken(): InputToken { numberRegEx.lastIndex = this.nextInputCodePointIndex; const result = numberRegEx.exec(this.text); if (!result) { return null; } this.nextInputCodePointIndex = numberRegEx.lastIndex; if (this.text[this.nextInputCodePointIndex] === '%') { return { type: TokenObjectType.percentage, text: result[0] }; // TODO: Push the actual number and unit here... } const name = this.consumeAName(); if (name) { return { type: TokenObjectType.dimension, text: result[0] + name.text, }; } return { type: TokenObjectType.number, text: result[0] }; } /** * 4.3.3. Consume an ident-like token * https://www.w3.org/TR/css-syntax-3/#consume-an-ident-like-token */ private consumeAnIdentLikeToken(): InputToken { const name = this.consumeAName(); if (!name) { return null; } if (this.text[this.nextInputCodePointIndex] === '(') { this.nextInputCodePointIndex++; if (name.text.toLowerCase() === 'url') { return this.consumeAURLToken(); } return { type: TokenObjectType.functionToken, name: name.text, text: name.text + '(', }; } return name; } /** * 4.3.4. Consume a string token * https://www.w3.org/TR/css-syntax-3/#consume-a-string-token */ private consumeAStringToken(): InputTokenObject { const char = this.text[this.nextInputCodePointIndex]; let result: RegExpExecArray; if (char === "'") { singleQuoteStringRegEx.lastIndex = this.nextInputCodePointIndex; result = singleQuoteStringRegEx.exec(this.text); if (!result) { return null; } this.nextInputCodePointIndex = singleQuoteStringRegEx.lastIndex; } else if (char === '"') { doubleQuoteStringRegEx.lastIndex = this.nextInputCodePointIndex; result = doubleQuoteStringRegEx.exec(this.text); if (!result) { return null; } this.nextInputCodePointIndex = doubleQuoteStringRegEx.lastIndex; } // TODO: Handle bad-string. // TODO: Perform string escaping. return { type: TokenObjectType.string, text: result[0] }; } /** * 4.3.5. Consume a url token * https://www.w3.org/TR/css-syntax-3/#consume-a-url-token */ private consumeAURLToken(): InputToken { const start = this.nextInputCodePointIndex - 3 /* url */ - 1; /* ( */ const urlToken: InputToken = { type: TokenObjectType.url, text: undefined, }; this.consumeAWhitespace(); if (this.nextInputCodePointIndex >= this.text.length) { return urlToken; } const nextInputCodePoint = this.text[this.nextInputCodePointIndex]; if (nextInputCodePoint === '"' || nextInputCodePoint === "'") { const stringToken = this.consumeAStringToken(); // TODO: Handle bad-string. // TODO: Set value instead. urlToken.text = stringToken.text; this.consumeAWhitespace(); if (this.text[this.nextInputCodePointIndex] === ')' || this.nextInputCodePointIndex >= this.text.length) { this.nextInputCodePointIndex++; const end = this.nextInputCodePointIndex; urlToken.text = this.text.substring(start, end); return urlToken; } else { // TODO: Handle bad-url. return null; } } while (this.nextInputCodePointIndex < this.text.length) { const char = this.text[this.nextInputCodePointIndex++]; switch (char) { case ')': return urlToken; case ' ': case '\t': case '\n': case '\r': case '\f': this.consumeAWhitespace(); if (this.text[this.nextInputCodePointIndex] === ')') { this.nextInputCodePointIndex++; return urlToken; } else { // TODO: Bar url! Consume remnants. return null; } case '"': case "'": // TODO: Parse error! Bar url! Consume remnants. return null; case '\\': // TODO: Escape! throw new Error('Escaping not yet supported!'); default: // TODO: Non-printable chars - error. urlToken.text += char; } } return urlToken; } /** * 4.3.11. Consume a name * https://www.w3.org/TR/css-syntax-3/#consume-a-name */ private consumeAName(): InputTokenObject { nameRegEx.lastIndex = this.nextInputCodePointIndex; const result = nameRegEx.exec(this.text); if (!result) { return null; } this.nextInputCodePointIndex = nameRegEx.lastIndex; // TODO: Perform string escaping. return { type: TokenObjectType.ident, text: result[0] }; } private consumeAtKeyword(): InputTokenObject { this.nextInputCodePointIndex++; const name = this.consumeAName(); if (name) { return { type: TokenObjectType.atKeyword, text: name.text }; } this.nextInputCodePointIndex--; return null; } private consumeAComment(): InputToken { if (this.text[this.nextInputCodePointIndex + 1] === '*') { commentRegEx.lastIndex = this.nextInputCodePointIndex; const result = commentRegEx.exec(this.text); if (!result) { return null; // TODO: Handle } this.nextInputCodePointIndex = commentRegEx.lastIndex; // The CSS spec tokenizer does not emmit comment tokens return this.consumeAToken(); } return null; } private reconsumeTheCurrentInputToken(currentInputToken: InputToken) { this.reconsumedInputToken = currentInputToken; } /** * 5.3.1. Parse a stylesheet * https://www.w3.org/TR/css-syntax-3/#parse-a-stylesheet */ public parseAStylesheet(): Stylesheet { this.topLevelFlag = true; return { rules: this.consumeAListOfRules(), }; } /** * 5.4.1. Consume a list of rules * https://www.w3.org/TR/css-syntax-3/#consume-a-list-of-rules */ public consumeAListOfRules(): Rule[] { const rules: Rule[] = []; let inputToken: InputToken; while ((inputToken = this.consumeAToken())) { switch (inputToken) { case ' ': continue; case '': { if (this.topLevelFlag) { continue; } this.reconsumeTheCurrentInputToken(inputToken); const atRule = this.consumeAnAtRule(); if (atRule) { rules.push(atRule); } continue; } } if ((inputToken).type === TokenObjectType.atKeyword) { this.reconsumeTheCurrentInputToken(inputToken); const atRule = this.consumeAnAtRule(); if (atRule) { rules.push(atRule); } continue; } this.reconsumeTheCurrentInputToken(inputToken); const qualifiedRule = this.consumeAQualifiedRule(); if (qualifiedRule) { rules.push(qualifiedRule); } } return rules; } /** * 5.4.2. Consume an at-rule * https://www.w3.org/TR/css-syntax-3/#consume-an-at-rule */ public consumeAnAtRule(): AtRule { let inputToken = this.consumeAToken(); const atRule: AtRule = { type: 'at-rule', name: (inputToken).text, prelude: [], block: undefined, }; while ((inputToken = this.consumeAToken())) { if (inputToken === ';') { return atRule; } else if (inputToken === '{') { atRule.block = this.consumeASimpleBlock(inputToken); return atRule; } else if ((inputToken).type === TokenObjectType.simpleBlock && (inputToken).associatedToken === '{') { atRule.block = inputToken; return atRule; } this.reconsumeTheCurrentInputToken(inputToken); const component = this.consumeAComponentValue(); if (component) { atRule.prelude.push(component); } } return atRule; } /** * 5.4.3. Consume a qualified rule * https://www.w3.org/TR/css-syntax-3/#consume-a-qualified-rule */ public consumeAQualifiedRule(): QualifiedRule { const qualifiedRule: QualifiedRule = { type: 'qualified-rule', prelude: [], block: undefined, }; let inputToken: InputToken; while ((inputToken = this.consumeAToken())) { if (inputToken === '{') { qualifiedRule.block = this.consumeASimpleBlock(inputToken); return qualifiedRule; } else if ((inputToken).type === TokenObjectType.simpleBlock) { const simpleBlock: SimpleBlock = inputToken; if (simpleBlock.associatedToken === '{') { qualifiedRule.block = simpleBlock; return qualifiedRule; } } this.reconsumeTheCurrentInputToken(inputToken); const componentValue = this.consumeAComponentValue(); if (componentValue) { qualifiedRule.prelude.push(componentValue); } } // TODO: This is a parse error, log parse errors! return null; } /** * 5.4.6. Consume a component value * https://www.w3.org/TR/css-syntax-3/#consume-a-component-value */ private consumeAComponentValue(): InputToken { // const inputToken = this.consumeAToken(); const inputToken = this.consumeAToken(); switch (inputToken) { case '{': case '[': case '(': this.nextInputCodePointIndex++; return this.consumeASimpleBlock(inputToken); } if (typeof inputToken === 'object' && inputToken.type === TokenObjectType.functionToken) { return this.consumeAFunction((inputToken).name); } return inputToken; } /** * 5.4.7. Consume a simple block * https://www.w3.org/TR/css-syntax-3/#consume-a-simple-block */ private consumeASimpleBlock(associatedToken: InputToken): SimpleBlock { const endianToken: ']' | '}' | ')' = { '[': ']', '{': '}', '(': ')', }[associatedToken]; const start = this.nextInputCodePointIndex - 1; const block: SimpleBlock = { type: TokenObjectType.simpleBlock, text: undefined, associatedToken, values: [], }; let nextInputToken; while ((nextInputToken = this.text[this.nextInputCodePointIndex])) { if (nextInputToken === endianToken) { this.nextInputCodePointIndex++; const end = this.nextInputCodePointIndex; block.text = this.text.substring(start, end); return block; } const value = this.consumeAComponentValue(); if (value) { block.values.push(value); } } block.text = this.text.substring(start); return block; } /** * 5.4.8. Consume a function * https://www.w3.org/TR/css-syntax-3/#consume-a-function */ private consumeAFunction(name: string): InputToken { const start = this.nextInputCodePointIndex; const funcToken: FunctionToken = { type: TokenObjectType.function, name, text: undefined, components: [], }; do { if (this.nextInputCodePointIndex >= this.text.length) { funcToken.text = name + '(' + this.text.substring(start); return funcToken; } const nextInputToken = this.text[this.nextInputCodePointIndex]; switch (nextInputToken) { case ')': { this.nextInputCodePointIndex++; const end = this.nextInputCodePointIndex; funcToken.text = name + '(' + this.text.substring(start, end); return funcToken; } default: { const component = this.consumeAComponentValue(); if (component) { funcToken.components.push(component); } } // TODO: Else we won't advance } } while (true); } }