Files
2017-02-04 14:01:51 +00:00

614 lines
14 KiB
ObjectPascal

{-------------------------------------------------------------------------------
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
the specific language governing rights and limitations under the License.
The Original Code is: GenLex.pas, released 2000-04-19.
Description: Tokenlist used by the generator.
The Original Code is based on mGenLex.pas by Martin Waldenburg, part of
the mwEdit component suite.
Portions created by Martin Waldenburg are Copyright (C) 1998 Martin Waldenburg.
All Rights Reserved.
Contributors to the SynEdit and mwEdit projects are listed in the
Contributors.txt file.
Alternatively, the contents of this file may be used under the terms of the
GNU General Public License Version 2 or later (the "GPL"), in which case
the provisions of the GPL are applicable instead of those above.
If you wish to allow use of your version of this file only under the terms
of the GPL and not to allow others to use your version of this file
under the MPL, indicate your decision by deleting the provisions above and
replace them with the notice and other provisions required by the GPL.
If you do not delete the provisions above, a recipient may use your version
of this file under either the MPL or the GPL.
$Id: GenLex.pas,v 1.4.2.4 2008/10/25 23:30:31 maelh Exp $
You may retrieve the latest version of this file at the SynEdit home page,
located at http://SynEdit.SourceForge.net
Known Issues:
-------------------------------------------------------------------------------}
unit GenLex;
interface
uses
SysUtils, Windows, Messages, Classes, Controls, LongIntList, SynUnicode;
var
Identifiers: array[#0..#255] of ByteBool;
mHashTable: array[#0..#255] of Integer;
type
TIdTokenKind = (
IdBeginFunc,
IdBeginProc,
IdBraceOpen,
IdChars,
IdCharset,
IdCRLF,
IdEndFunc,
IdEndProc,
IdIdent,
IdIdentifier,
IdIdentStart,
IdKeys,
IdTokenTypes,
IdNull,
IdSensitive,
IdSpace,
IdStop,
IdEnclosedBy,
IdSampleSource,
IdUnknown);
type
TGenLex = class(TObject)
private
fIgnoreComments: Boolean;
fOrigin: PWideChar;
fProcTable: array[#0..#255] of procedure of object;
fFuncTable: array[#0..#255] of function: TIdTokenKind of object;
Run: Integer;
Walker: LongInt;
Running: LongInt;
fStringLen: Integer;
fToIdent: PWideChar;
fTokenizing: Boolean;
FLinePosList: TLongIntList;
FTokenPositionsList: TLongIntList;
fIdentFuncTable: array[0..150] of function: TIdTokenKind of object;
function KeyHash(ToHash: PWideChar): Integer;
function KeyComp(aKey: UnicodeString): Boolean;
function Func49: TIdTokenKind;
function Func60: TIdTokenKind;
function Func67: TIdTokenKind;
function Func75: TIdTokenKind;
function Func81: TIdTokenKind;
function Func89: TIdTokenKind;
function Func104: TIdTokenKind;
function Func122: TIdTokenKind;
function Func130: TIdTokenKind;
function Func147: TIdTokenKind;
function Func150: TIdTokenKind;
procedure BraceOpenProc;
function BraceOpenFunc: TIdTokenKind;
procedure CRLFProc;
function CRLFFunc: TIdTokenKind;
procedure CharsetProc;
function CharsetFunc: TIdTokenKind;
procedure IdentProc;
function IdentFunc: TIdTokenKind;
procedure NullProc;
function NullFunc: TIdTokenKind;
procedure SpaceProc;
function SpaceFunc: TIdTokenKind;
procedure StopProc;
function StopFunc: TIdTokenKind;
procedure UnknownProc;
function UnknownFunc: TIdTokenKind;
function AltFunc: TIdTokenKind;
procedure InitIdent;
function IdentKind(MayBe: PWideChar): TIdTokenKind;
procedure SetOrigin(NewValue: PWideChar);
procedure SetRunPos(Value: Integer);
procedure MakeMethodTables;
function GetRunId: TIdTokenKind;
function GetRunToken: UnicodeString;
protected
public
constructor Create;
destructor Destroy; override;
procedure Tokenize;
procedure Next;
property IgnoreComments: Boolean read fIgnoreComments write fIgnoreComments;
property Origin: PWideChar read fOrigin write SetOrigin;
property RunPos: Integer read Run write SetRunPos;
function NextToken: UnicodeString;
function EOF: Boolean;
property RunId: TIdTokenKind read GetRunId;
property RunToken: UnicodeString read GetRunToken;
published
end;
implementation
procedure MakeIdentTable;
var
I, J: Char;
begin
for I := #0 to #255 do
begin
case I of
'_', '0'..'9', 'a'..'z', 'A'..'Z': Identifiers[I] := True;
else
Identifiers[I] := False;
end;
J := UpperCase(I)[1];
case CharInSet(I, ['_', 'a'..'z', 'A'..'Z']) of
True: mHashTable[I] := Ord(J) - 64
else
mHashTable[I] := 0;
end;
end;
end;
procedure TGenLex.InitIdent;
var
I: Integer;
begin
for I := 0 to 150 do
case I of
49: fIdentFuncTable[I] := Func49;
60: fIdentFuncTable[I] := Func60;
67: fIdentFuncTable[I] := Func67;
75: fIdentFuncTable[I] := Func75;
81: fIdentFuncTable[I] := Func81;
89: fIdentFuncTable[I] := Func89;
104: fIdentFuncTable[I] := Func104;
122: fIdentFuncTable[I] := Func122;
130: fIdentFuncTable[I] := Func130;
147: fIdentFuncTable[I] := Func147;
150: fIdentFuncTable[I] := Func150;
else
fIdentFuncTable[I] := AltFunc;
end;
end;
function TGenLex.KeyHash(ToHash: PWideChar): Integer;
begin
Result := 0;
while CharInSet(ToHash^, ['_', '0'..'9', 'a'..'z', 'A'..'Z']) do
begin
Inc(Result, mHashTable[Char(ToHash^)]);
Inc(ToHash);
end;
fStringLen := ToHash - fToIdent;
end; { KeyHash }
function TGenLex.KeyComp(aKey: UnicodeString): Boolean;
var
I: Integer;
Temp: PWideChar;
begin
Temp := fToIdent;
if Length(aKey) = fStringLen then
begin
Result := True;
for i := 1 to fStringLen do
begin
if mHashTable[Char(Temp^)] <> mHashTable[Char(aKey[i])] then
begin
Result := False;
break;
end;
inc(Temp);
end;
end
else
Result := False;
end; { KeyComp }
function TGenLex.Func49: TIdTokenKind;
begin
if KeyComp('Chars') then
Result := IdChars
else
Result := IDIdentifier;
end;
function TGenLex.Func60: TIdTokenKind;
begin
if KeyComp('Keys') then
Result := IdKeys
else
Result := IDIdentifier;
end;
function TGenLex.Func67: TIdTokenKind;
begin
if KeyComp('EndFunc') then
Result := IdEndFunc
else
Result := IDIdentifier;
end;
function TGenLex.Func75: TIdTokenKind;
begin
if KeyComp('EndProc') then
Result := IdEndProc
else
Result := IDIdentifier;
end;
function TGenLex.Func81: TIdTokenKind;
begin
if KeyComp('BeginFunc') then
Result := IdBeginFunc
else
Result := IDIdentifier;
end;
function TGenLex.Func89: TIdTokenKind;
begin
if KeyComp('BeginProc') then
Result := IdBeginProc
else
Result := IDIdentifier;
end;
function TGenLex.Func104: TIdTokenKind;
begin
if KeyComp('EnclosedBy') then
Result := IdEnclosedBy
else
Result := IDIdentifier;
end;
function TGenLex.Func122: TIdTokenKind;
begin
if KeyComp('Sensitive') then
Result := IdSensitive
else
Result := IDIdentifier;
end;
function TGenLex.Func130: TIdTokenKind;
begin
if KeyComp('IdentStart') then
Result := IdIdentStart
else
Result := IDIdentifier;
end;
function TGenLex.Func147: TIdTokenKind;
begin
if KeyComp('SAMPLESOURCE') then
Result := IdSampleSource
else
Result := IDIdentifier;
end;
function TGenLex.Func150: TIdTokenKind;
begin
if KeyComp('TOKENTYPES') then
Result := IdTokenTypes
else
Result := IDIdentifier;
end;
function TGenLex.AltFunc: TIdTokenKind;
begin
Result := IdIdentifier;
end;
function TGenLex.IdentKind(MayBe: PWideChar): TIdTokenKind;
var
HashKey: Integer;
begin
fToIdent := MayBe;
HashKey := KeyHash(MayBe);
if HashKey < 151 then
Result := fIdentFuncTable[HashKey]
else
Result := IdIdentifier;
end;
procedure TGenLex.MakeMethodTables;
var
I: Char;
begin
for I := #0 to #255 do
case I of
'{':
begin
fProcTable[I] := BraceOpenProc;
fFuncTable[I] := BraceOpenFunc;
end;
#10, #13:
begin
fProcTable[I] := CRLFProc;
fFuncTable[I] := CRLFFunc;
end;
#39, '#':
begin
fProcTable[I] := CharsetProc;
fFuncTable[I] := CharsetFunc;
end;
'A'..'Z', 'a'..'z', '_':
begin
fProcTable[I] := IdentProc;
fFuncTable[I] := IdentFunc;
end;
#0:
begin
fProcTable[I] := NullProc;
fFuncTable[I] := NullFunc;
end;
#1..#9, #11, #12, #14..#32:
begin
fProcTable[I] := SpaceProc;
fFuncTable[I] := SpaceFunc;
end;
'|':
begin
fProcTable[I] := StopProc;
fFuncTable[I] := StopFunc;
end;
else
begin
fProcTable[I] := UnknownProc;
fFuncTable[I] := UnknownFunc;
end;
end;
end;
constructor TGenLex.Create;
begin
inherited Create;
InitIdent;
MakeMethodTables;
fIgnoreComments := False;
FTokenPositionsList := TLongIntList.Create;
FLinePosList := TLongIntList.Create;
end; { Create }
destructor TGenLex.Destroy;
begin
inherited Destroy;
FTokenPositionsList.Free;
FLinePosList.Free;
end; { Destroy }
procedure TGenLex.SetOrigin(NewValue: PWideChar);
begin
fOrigin := NewValue;
Run := 0;
Walker := 0;
FTokenPositionsList.Clear;
FTokenPositionsList.Add(0);
FLinePosList.Clear;
FLinePosList.Add(0);
end; { SetOrigin }
procedure TGenLex.SetRunPos(Value: Integer);
begin
Run := Value;
end;
procedure TGenLex.BraceOpenProc;
begin
Inc(Walker);
if not fIgnoreComments then
begin
while FOrigin[Walker] <> #0 do
begin
case FOrigin[Walker] of
'}':
begin
Inc(Walker);
Break;
end;
#10:
begin
Inc(Walker);
if fTokenizing then
FLinePosList.Add(Walker);
end;
#13:
begin
if FOrigin[Walker + 1] = #10 then
Inc(Walker, 2)
else
Inc(Walker);
if fTokenizing then
FLinePosList.Add(Walker);
end;
else
Inc(Walker);
end;
end;
end;
end;
function TGenLex.BraceOpenFunc: TIdTokenKind;
begin
Result := IDBraceOpen;
end;
procedure TGenLex.CRLFProc;
begin
case FOrigin[Walker] of
#10: inc(Walker);
#13:
case FOrigin[Walker + 1] of
#10: inc(Walker, 2);
else
inc(Walker);
end;
end;
if fTokenizing then
FLinePosList.Add(Walker);
end;
function TGenLex.CRLFFunc: TIdTokenKind;
begin
Result := IdCRLF;
end;
procedure TGenLex.CharsetProc;
begin
while FOrigin[Walker] <> #0 do
begin
case FOrigin[Walker] of
#10, #13: break;
':': if FOrigin[Walker + 1] = ':' then
break
else
inc(Walker);
else
inc(Walker);
end;
end;
end;
function TGenLex.CharsetFunc: TIdTokenKind;
begin
Result := IDCharSet;
end;
procedure TGenLex.IdentProc;
begin
inc(Walker);
while Identifiers[Char(fOrigin[Walker])] do
inc(Walker);
end;
function TGenLex.IdentFunc: TIdTokenKind;
begin
Result := IdentKind((fOrigin + Running));
end;
procedure TGenLex.NullProc;
begin
if fTokenizing then
if not CharInSet(FOrigin[Walker - 1], [#10, #13]) then
FLinePosList.Add(Walker);
end;
function TGenLex.NullFunc: TIdTokenKind;
begin
Result := IdNull;
end;
procedure TGenLex.SpaceProc;
begin
while CharInSet(fOrigin[Walker], [#1..#9, #11, #12, #14..#32]) do
inc(Walker);
end;
function TGenLex.SpaceFunc: TIdTokenKind;
begin
Result := IdSpace;
end;
procedure TGenLex.StopProc;
begin
inc(Walker);
while FOrigin[Walker] <> #0 do
begin
case FOrigin[Walker] of
#10: break;
#13: break;
'|':
begin
Inc(Walker);
break;
end;
else
Inc(Walker);
end;
end;
end;
function TGenLex.StopFunc: TIdTokenKind;
begin
Result := IdUnknown;
if FOrigin[Running + 1] = '>' then
if FOrigin[Running + 2] = '<' then
if FOrigin[Running + 3] = '|' then
Result := IDStop;
end;
procedure TGenLex.UnknownProc;
begin
inc(Walker);
end;
function TGenLex.UnknownFunc: TIdTokenKind;
begin
Result := IdUnknown;
end;
function TGenLex.EOF: Boolean;
begin
Result := False;
end; { EOF }
function TGenLex.GetRunId: TIdTokenKind;
begin
Running := FTokenPositionsList[Run];
Result := fFuncTable[Char(fOrigin[Running])];
end;
function TGenLex.GetRunToken: UnicodeString;
var
StartPos, EndPos, StringLen: Integer;
begin
StartPos := FTokenPositionsList[Run];
EndPos := FTokenPositionsList[Run + 1];
StringLen := EndPos - StartPos;
SetString(Result, (FOrigin + StartPos), Stringlen);
end;
procedure TGenLex.Tokenize;
begin
fTokenizing := True;
repeat
fProcTable[Char(fOrigin[Walker])];
FTokenPositionsList.Add(Walker);
until fOrigin[Walker] = #0;
fTokenizing := False;
end;
procedure TGenLex.Next;
begin
Inc(Run);
end;
function TGenLex.NextToken: UnicodeString;
var
StartPos, EndPos, Len: LongInt;
begin
StartPos := FTokenPositionsList[Run];
EndPos := FTokenPositionsList[Run + 1];
Len := EndPos - StartPos;
SetString(Result, (FOrigin + StartPos), Len);
inc(Run);
end;
initialization
MakeIdentTable;
end.