mirror of
https://github.com/HeidiSQL/HeidiSQL.git
synced 2025-08-26 03:01:07 +08:00
Rename ReadUnicodeFile to ReadTextfile and split it into 3 different procedures:
- OpenTextfile - ReadTextfileChunk - ReadTextfile Enables us to use the unicode detection in places where we need to read a file chunkwise. (e.g. the runsqlfile unit) TODO: Fix UTF-8 detection, misdetected as ANSI currently.
This commit is contained in:
@ -27,6 +27,8 @@ type
|
||||
TVTreeDataArray = Array of TVTreeData;
|
||||
PVTreeDataArray = ^TVTreeDataArray;
|
||||
|
||||
TFileCharset = (fcsAnsi, fcsUnicode, fcsUnicodeSwapped, fcsUtf8);
|
||||
|
||||
{$I const.inc}
|
||||
|
||||
function trimc(s: String; c: Char) : String;
|
||||
@ -93,7 +95,9 @@ type
|
||||
function GetDBObjectType( TableStatus: TFields ): Byte;
|
||||
procedure SetWindowSizeGrip(hWnd: HWND; Enable: boolean);
|
||||
procedure SaveUnicodeFile(Filename: String; Text: WideString);
|
||||
function ReadUnicodeFile(Filename: String): WideString;
|
||||
procedure OpenTextFile(const Filename: String; out Stream: TFileStream; out FileCharset: TFileCharset);
|
||||
function ReadTextfileChunk(Stream: TFileStream; FileCharset: TFileCharset; ChunkSize: Cardinal = 0): WideString;
|
||||
function ReadTextfile(Filename: String): WideString;
|
||||
|
||||
var
|
||||
MYSQL_KEYWORDS : TStringList;
|
||||
@ -2375,86 +2379,99 @@ end;
|
||||
|
||||
|
||||
{**
|
||||
Read a unicode or ansi file into memory
|
||||
Open a textfile unicode safe and return a stream + its charset
|
||||
}
|
||||
function ReadUnicodeFile(Filename: String): WideString;
|
||||
procedure OpenTextFile(const Filename: String; out Stream: TFileStream; out FileCharset: TFileCharset);
|
||||
var
|
||||
Stream: TFileStream;
|
||||
ByteOrderMark: WideChar;
|
||||
BytesRead: Integer;
|
||||
Utf8Test: array[0..2] of AnsiChar;
|
||||
DataLeft: Integer;
|
||||
StreamCharSet: Byte;
|
||||
SA: AnsiString;
|
||||
P: PWord;
|
||||
const
|
||||
UNICODE_BOM = WideChar($FEFF);
|
||||
UNICODE_BOM_SWAPPED = WideChar($FFFE);
|
||||
UTF8_BOM = AnsiString(#$EF#$BB#$BF);
|
||||
sscAnsi = 0;
|
||||
sscUnicode = 1;
|
||||
sscUnicodeSwapped = 2;
|
||||
sscUtf8 = 3;
|
||||
begin
|
||||
Stream := TFileStream.Create(Filename, fmOpenRead or fmShareDenyWrite);
|
||||
Stream.Position := 0;
|
||||
|
||||
// Byte Order Mark
|
||||
ByteOrderMark := #0;
|
||||
if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then begin
|
||||
BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
|
||||
if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then begin
|
||||
ByteOrderMark := #0;
|
||||
Stream.Seek(-BytesRead, soFromCurrent);
|
||||
if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then begin
|
||||
BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
|
||||
if Utf8Test <> UTF8_BOM then
|
||||
Stream.Seek(-BytesRead, soFromCurrent);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
// Test Byte Order Mark
|
||||
if ByteOrderMark = UNICODE_BOM then
|
||||
FileCharset := fcsUnicode
|
||||
else if ByteOrderMark = UNICODE_BOM_SWAPPED then
|
||||
FileCharset := fcsUnicodeSwapped
|
||||
else if Utf8Test = UTF8_BOM then
|
||||
FileCharset := fcsUtf8
|
||||
else
|
||||
FileCharset := fcsAnsi;
|
||||
end;
|
||||
|
||||
|
||||
{**
|
||||
Read a chunk out of a textfile unicode safe by passing a stream and its charset
|
||||
}
|
||||
function ReadTextfileChunk(Stream: TFileStream; FileCharset: TFileCharset; ChunkSize: Cardinal = 0): WideString;
|
||||
var
|
||||
SA: AnsiString;
|
||||
P: PWord;
|
||||
begin
|
||||
if ChunkSize = 0 then
|
||||
ChunkSize := Stream.Size - Stream.Position;
|
||||
if (FileCharset in [fcsUnicode, fcsUnicodeSwapped]) then begin
|
||||
// BOM indicates Unicode text stream
|
||||
if ChunkSize < SizeOf(WideChar) then
|
||||
Result := ''
|
||||
else begin
|
||||
SetLength(Result, ChunkSize div SizeOf(WideChar));
|
||||
Stream.Read(PWideChar(Result)^, ChunkSize);
|
||||
if FileCharset = fcsUnicodeSwapped then begin
|
||||
P := PWord(PWideChar(Result));
|
||||
While (P^ <> 0) do begin
|
||||
P^ := MakeWord(HiByte(P^), LoByte(P^));
|
||||
Inc(P);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end else if FileCharset = fcsUtf8 then begin
|
||||
// BOM indicates UTF-8 text stream
|
||||
SetLength(SA, ChunkSize div SizeOf(AnsiChar));
|
||||
Stream.Read(PAnsiChar(SA)^, ChunkSize);
|
||||
Result := UTF8Decode(SA);
|
||||
end else begin
|
||||
// without byte order mark it is assumed that we are loading ANSI text
|
||||
SetLength(SA, ChunkSize div SizeOf(AnsiChar));
|
||||
Stream.Read(PAnsiChar(SA)^, ChunkSize);
|
||||
Result := SA;
|
||||
end;
|
||||
end;
|
||||
|
||||
{**
|
||||
Read a unicode or ansi file into memory
|
||||
}
|
||||
function ReadTextfile(Filename: String): WideString;
|
||||
var
|
||||
Stream: TFileStream;
|
||||
FileCharset: TFileCharset;
|
||||
begin
|
||||
try
|
||||
Stream.Position := 0;
|
||||
|
||||
// Byte Order Mark
|
||||
ByteOrderMark := #0;
|
||||
if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then begin
|
||||
BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
|
||||
if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then begin
|
||||
ByteOrderMark := #0;
|
||||
Stream.Seek(-BytesRead, soFromCurrent);
|
||||
if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then begin
|
||||
BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
|
||||
if Utf8Test <> UTF8_BOM then
|
||||
Stream.Seek(-BytesRead, soFromCurrent);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
// Test Byte Order Mark
|
||||
if ByteOrderMark = UNICODE_BOM then
|
||||
StreamCharSet := sscUnicode
|
||||
else if ByteOrderMark = UNICODE_BOM_SWAPPED then
|
||||
StreamCharSet := sscUnicodeSwapped
|
||||
else if Utf8Test = UTF8_BOM then
|
||||
StreamCharSet := sscUtf8
|
||||
else
|
||||
StreamCharSet := sscAnsi;
|
||||
|
||||
DataLeft := Stream.Size - Stream.Position;
|
||||
if (StreamCharSet in [sscUnicode, sscUnicodeSwapped]) then begin
|
||||
// BOM indicates Unicode text stream
|
||||
if DataLeft < SizeOf(WideChar) then
|
||||
Result := ''
|
||||
else begin
|
||||
SetLength(Result, DataLeft div SizeOf(WideChar));
|
||||
Stream.Read(PWideChar(Result)^, DataLeft);
|
||||
if StreamCharSet = sscUnicodeSwapped then begin
|
||||
P := PWord(PWideChar(Result));
|
||||
While (P^ <> 0) do begin
|
||||
P^ := MakeWord(HiByte(P^), LoByte(P^));
|
||||
Inc(P);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end else if StreamCharSet = sscUtf8 then begin
|
||||
// BOM indicates UTF-8 text stream
|
||||
SetLength(SA, DataLeft div SizeOf(AnsiChar));
|
||||
Stream.Read(PAnsiChar(SA)^, DataLeft);
|
||||
Result := UTF8Decode(SA);
|
||||
end else begin
|
||||
// without byte order mark it is assumed that we are loading ANSI text
|
||||
SetLength(SA, DataLeft div SizeOf(AnsiChar));
|
||||
Stream.Read(PAnsiChar(SA)^, DataLeft);
|
||||
Result := SA;
|
||||
end;
|
||||
OpenTextfile(Filename, Stream, FileCharset);
|
||||
Result := ReadTextfileChunk(Stream, FileCharset);
|
||||
finally
|
||||
Stream.Free;
|
||||
end;
|
||||
|
||||
end;
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user