mirror of
				https://github.com/skishore/makemeahanzi.git
				synced 2025-10-31 10:56:39 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			409 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			409 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /*!
 | |
|  * RegJSGen
 | |
|  * Copyright 2014 Benjamin Tan <https://d10.github.io/>
 | |
|  * Available under MIT license <http://d10.mit-license.org/>
 | |
|  */
 | |
| ;(function() {
 | |
|   'use strict';
 | |
| 
 | |
|   /** Used to determine if values are of the language type `Object` */
 | |
|   var objectTypes = {
 | |
|     'function': true,
 | |
|     'object': true
 | |
|   };
 | |
| 
 | |
|   /** Used as a reference to the global object */
 | |
|   var root = (objectTypes[typeof window] && window) || this;
 | |
| 
 | |
|   /** Backup possible global object */
 | |
|   var oldRoot = root;
 | |
| 
 | |
|   /** Detect free variable `exports` */
 | |
|   var freeExports = objectTypes[typeof exports] && exports;
 | |
| 
 | |
|   /** Detect free variable `module` */
 | |
|   var freeModule = objectTypes[typeof module] && module && !module.nodeType && module;
 | |
| 
 | |
|   /** Detect free variable `global` from Node.js or Browserified code and use it as `root` */
 | |
|   var freeGlobal = freeExports && freeModule && typeof global == 'object' && global;
 | |
|   if (freeGlobal && (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal)) {
 | |
|     root = freeGlobal;
 | |
|   }
 | |
| 
 | |
|   /*--------------------------------------------------------------------------*/
 | |
| 
 | |
|   /*! Based on https://mths.be/fromcodepoint v0.2.0 by @mathias */
 | |
| 
 | |
|   var stringFromCharCode = String.fromCharCode;
 | |
|   var floor = Math.floor;
 | |
|   function fromCodePoint() {
 | |
|     var MAX_SIZE = 0x4000;
 | |
|     var codeUnits = [];
 | |
|     var highSurrogate;
 | |
|     var lowSurrogate;
 | |
|     var index = -1;
 | |
|     var length = arguments.length;
 | |
|     if (!length) {
 | |
|       return '';
 | |
|     }
 | |
|     var result = '';
 | |
|     while (++index < length) {
 | |
|       var codePoint = Number(arguments[index]);
 | |
|       if (
 | |
|         !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
 | |
|         codePoint < 0 || // not a valid Unicode code point
 | |
|         codePoint > 0x10FFFF || // not a valid Unicode code point
 | |
|         floor(codePoint) != codePoint // not an integer
 | |
|       ) {
 | |
|         throw RangeError('Invalid code point: ' + codePoint);
 | |
|       }
 | |
|       if (codePoint <= 0xFFFF) {
 | |
|         // BMP code point
 | |
|         codeUnits.push(codePoint);
 | |
|       } else {
 | |
|         // Astral code point; split in surrogate halves
 | |
|         // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
 | |
|         codePoint -= 0x10000;
 | |
|         highSurrogate = (codePoint >> 10) + 0xD800;
 | |
|         lowSurrogate = (codePoint % 0x400) + 0xDC00;
 | |
|         codeUnits.push(highSurrogate, lowSurrogate);
 | |
|       }
 | |
|       if (index + 1 == length || codeUnits.length > MAX_SIZE) {
 | |
|         result += stringFromCharCode.apply(null, codeUnits);
 | |
|         codeUnits.length = 0;
 | |
|       }
 | |
|     }
 | |
|     return result;
 | |
|   }
 | |
| 
 | |
|   function assertType(type, expected) {
 | |
|     if (expected.indexOf('|') == -1) {
 | |
|       if (type == expected) {
 | |
|         return;
 | |
|       }
 | |
| 
 | |
|       throw Error('Invalid node type: ' + type);
 | |
|     }
 | |
| 
 | |
|     expected = assertType.hasOwnProperty(expected)
 | |
|       ? assertType[expected]
 | |
|       : (assertType[expected] = RegExp('^(?:' + expected + ')$'));
 | |
| 
 | |
|     if (expected.test(type)) {
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     throw Error('Invalid node type: ' + type);
 | |
|   }
 | |
| 
 | |
|   /*--------------------------------------------------------------------------*/
 | |
| 
 | |
|   function generate(node) {
 | |
|     var type = node.type;
 | |
| 
 | |
|     if (generate.hasOwnProperty(type) && typeof generate[type] == 'function') {
 | |
|       return generate[type](node);
 | |
|     }
 | |
| 
 | |
|     throw Error('Invalid node type: ' + type);
 | |
|   }
 | |
| 
 | |
|   /*--------------------------------------------------------------------------*/
 | |
| 
 | |
|   function generateAlternative(node) {
 | |
|     assertType(node.type, 'alternative');
 | |
| 
 | |
|     var terms = node.body,
 | |
|         length = terms ? terms.length : 0;
 | |
| 
 | |
|     if (length == 1) {
 | |
|       return generateTerm(terms[0]);
 | |
|     } else {
 | |
|       var i = -1,
 | |
|           result = '';
 | |
| 
 | |
|       while (++i < length) {
 | |
|         result += generateTerm(terms[i]);
 | |
|       }
 | |
| 
 | |
|       return result;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   function generateAnchor(node) {
 | |
|     assertType(node.type, 'anchor');
 | |
| 
 | |
|     switch (node.kind) {
 | |
|       case 'start':
 | |
|         return '^';
 | |
|       case 'end':
 | |
|         return '$';
 | |
|       case 'boundary':
 | |
|         return '\\b';
 | |
|       case 'not-boundary':
 | |
|         return '\\B';
 | |
|       default:
 | |
|         throw Error('Invalid assertion');
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   function generateAtom(node) {
 | |
|     assertType(node.type, 'anchor|characterClass|characterClassEscape|dot|group|reference|value');
 | |
| 
 | |
|     return generate(node);
 | |
|   }
 | |
| 
 | |
|   function generateCharacterClass(node) {
 | |
|     assertType(node.type, 'characterClass');
 | |
| 
 | |
|     var classRanges = node.body,
 | |
|         length = classRanges ? classRanges.length : 0;
 | |
| 
 | |
|     var i = -1,
 | |
|         result = '[';
 | |
| 
 | |
|     if (node.negative) {
 | |
|       result += '^';
 | |
|     }
 | |
| 
 | |
|     while (++i < length) {
 | |
|       result += generateClassAtom(classRanges[i]);
 | |
|     }
 | |
| 
 | |
|     result += ']';
 | |
| 
 | |
|     return result;
 | |
|   }
 | |
| 
 | |
|   function generateCharacterClassEscape(node) {
 | |
|     assertType(node.type, 'characterClassEscape');
 | |
| 
 | |
|     return '\\' + node.value;
 | |
|   }
 | |
| 
 | |
|   function generateCharacterClassRange(node) {
 | |
|     assertType(node.type, 'characterClassRange');
 | |
| 
 | |
|     var min = node.min,
 | |
|         max = node.max;
 | |
| 
 | |
|     if (min.type == 'characterClassRange' || max.type == 'characterClassRange') {
 | |
|       throw Error('Invalid character class range');
 | |
|     }
 | |
| 
 | |
|     return generateClassAtom(min) + '-' + generateClassAtom(max);
 | |
|   }
 | |
| 
 | |
|   function generateClassAtom(node) {
 | |
|     assertType(node.type, 'anchor|characterClassEscape|characterClassRange|dot|value');
 | |
| 
 | |
|     return generate(node);
 | |
|   }
 | |
| 
 | |
|   function generateDisjunction(node) {
 | |
|     assertType(node.type, 'disjunction');
 | |
| 
 | |
|     var body = node.body,
 | |
|         length = body ? body.length : 0;
 | |
| 
 | |
|     if (length == 0) {
 | |
|       throw Error('No body');
 | |
|     } else if (length == 1) {
 | |
|       return generate(body[0]);
 | |
|     } else {
 | |
|       var i = -1,
 | |
|           result = '';
 | |
| 
 | |
|       while (++i < length) {
 | |
|         if (i != 0) {
 | |
|           result += '|';
 | |
|         }
 | |
|         result += generate(body[i]);
 | |
|       }
 | |
| 
 | |
|       return result;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   function generateDot(node) {
 | |
|     assertType(node.type, 'dot');
 | |
| 
 | |
|     return '.';
 | |
|   }
 | |
| 
 | |
|   function generateGroup(node) {
 | |
|     assertType(node.type, 'group');
 | |
| 
 | |
|     var result = '(';
 | |
| 
 | |
|     switch (node.behavior) {
 | |
|       case 'normal':
 | |
|         break;
 | |
|       case 'ignore':
 | |
|         result += '?:';
 | |
|         break;
 | |
|       case 'lookahead':
 | |
|         result += '?=';
 | |
|         break;
 | |
|       case 'negativeLookahead':
 | |
|         result += '?!';
 | |
|         break;
 | |
|       default:
 | |
|         throw Error('Invalid behaviour: ' + node.behaviour);
 | |
|     }
 | |
| 
 | |
|     var body = node.body,
 | |
|         length = body ? body.length : 0;
 | |
| 
 | |
|     if (length == 1) {
 | |
|       result += generate(body[0]);
 | |
|     } else {
 | |
|       var i = -1;
 | |
| 
 | |
|       while (++i < length) {
 | |
|         result += generate(body[i]);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     result += ')';
 | |
| 
 | |
|     return result;
 | |
|   }
 | |
| 
 | |
|   function generateQuantifier(node) {
 | |
|     assertType(node.type, 'quantifier');
 | |
| 
 | |
|     var quantifier = '',
 | |
|         min = node.min,
 | |
|         max = node.max;
 | |
| 
 | |
|     switch (max) {
 | |
|       case undefined:
 | |
|       case null:
 | |
|         switch (min) {
 | |
|           case 0:
 | |
|             quantifier = '*'
 | |
|             break;
 | |
|           case 1:
 | |
|             quantifier = '+';
 | |
|             break;
 | |
|           default:
 | |
|             quantifier = '{' + min + ',}';
 | |
|             break;
 | |
|         }
 | |
|         break;
 | |
|       default:
 | |
|         if (min == max) {
 | |
|           quantifier = '{' + min + '}';
 | |
|         }
 | |
|         else if (min == 0 && max == 1) {
 | |
|           quantifier = '?';
 | |
|         } else {
 | |
|           quantifier = '{' + min + ',' + max + '}';
 | |
|         }
 | |
|         break;
 | |
|     }
 | |
| 
 | |
|     if (!node.greedy) {
 | |
|       quantifier += '?';
 | |
|     }
 | |
| 
 | |
|     return generateAtom(node.body[0]) + quantifier;
 | |
|   }
 | |
| 
 | |
|   function generateReference(node) {
 | |
|     assertType(node.type, 'reference');
 | |
| 
 | |
|     return '\\' + node.matchIndex;
 | |
|   }
 | |
| 
 | |
|   function generateTerm(node) {
 | |
|     assertType(node.type, 'anchor|characterClass|characterClassEscape|empty|group|quantifier|reference|value');
 | |
| 
 | |
|     return generate(node);
 | |
|   }
 | |
| 
 | |
|   function generateValue(node) {
 | |
|     assertType(node.type, 'value');
 | |
| 
 | |
|     var kind = node.kind,
 | |
|         codePoint = node.codePoint;
 | |
| 
 | |
|     switch (kind) {
 | |
|       case 'controlLetter':
 | |
|         return '\\c' + fromCodePoint(codePoint + 64);
 | |
|       case 'hexadecimalEscape':
 | |
|         return '\\x' + ('00' + codePoint.toString(16).toUpperCase()).slice(-2);
 | |
|       case 'identifier':
 | |
|         return '\\' + fromCodePoint(codePoint);
 | |
|       case 'null':
 | |
|         return '\\' + codePoint;
 | |
|       case 'octal':
 | |
|         return '\\' + codePoint.toString(8);
 | |
|       case 'singleEscape':
 | |
|         switch (codePoint) {
 | |
|           case 0x0008:
 | |
|             return '\\b';
 | |
|           case 0x009:
 | |
|             return '\\t';
 | |
|           case 0x00A:
 | |
|             return '\\n';
 | |
|           case 0x00B:
 | |
|             return '\\v';
 | |
|           case 0x00C:
 | |
|             return '\\f';
 | |
|           case 0x00D:
 | |
|             return '\\r';
 | |
|           default:
 | |
|             throw Error('Invalid codepoint: ' + codePoint);
 | |
|         }
 | |
|       case 'symbol':
 | |
|         return fromCodePoint(codePoint);
 | |
|       case 'unicodeEscape':
 | |
|         return '\\u' + ('0000' + codePoint.toString(16).toUpperCase()).slice(-4);
 | |
|       case 'unicodeCodePointEscape':
 | |
|         return '\\u{' + codePoint.toString(16).toUpperCase() + '}';
 | |
|       default:
 | |
|         throw Error('Unsupported node kind: ' + kind);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /*--------------------------------------------------------------------------*/
 | |
| 
 | |
|   generate.alternative = generateAlternative;
 | |
|   generate.anchor = generateAnchor;
 | |
|   generate.characterClass = generateCharacterClass;
 | |
|   generate.characterClassEscape = generateCharacterClassEscape;
 | |
|   generate.characterClassRange = generateCharacterClassRange;
 | |
|   generate.disjunction = generateDisjunction;
 | |
|   generate.dot = generateDot;
 | |
|   generate.group = generateGroup;
 | |
|   generate.quantifier = generateQuantifier;
 | |
|   generate.reference = generateReference;
 | |
|   generate.value = generateValue;
 | |
| 
 | |
|   /*--------------------------------------------------------------------------*/
 | |
| 
 | |
|   // export regjsgen
 | |
|   // some AMD build optimizers, like r.js, check for condition patterns like the following:
 | |
|   if (typeof define == 'function' && typeof define.amd == 'object' && define.amd) {
 | |
|     // define as an anonymous module so, through path mapping, it can be aliased
 | |
|     define(function() {
 | |
|       return {
 | |
|         'generate': generate
 | |
|       };
 | |
|     });
 | |
|   }
 | |
|   // check for `exports` after `define` in case a build optimizer adds an `exports` object
 | |
|   else if (freeExports && freeModule) {
 | |
|     // in Narwhal, Node.js, Rhino -require, or RingoJS
 | |
|     freeExports.generate = generate;
 | |
|   }
 | |
|   // in a browser or Rhino
 | |
|   else {
 | |
|     root.regjsgen = {
 | |
|       'generate': generate
 | |
|     };
 | |
|   }
 | |
| }.call(this));
 | 
