Rename ReadUnicodeFile to ReadTextfile and split it into 3 different procedures:

- OpenTextfile - ReadTextfileChunk - ReadTextfile Enables us to use the unicode detection in places where we need to read a file chunkwise. (e.g. the runsqlfile unit) TODO: Fix UTF-8 detection, misdetected as ANSI currently.
2025-08-26 03:01:07 +08:00 · 2008-06-19 19:08:54 +00:00
parent 4cd5743980
commit 58230fea56
2 changed files with 84 additions and 67 deletions
--- a/source/helpers.pas
+++ b/source/helpers.pas
@ -27,6 +27,8 @@ type
  TVTreeDataArray = Array of TVTreeData;
  PVTreeDataArray = ^TVTreeDataArray;

+  TFileCharset = (fcsAnsi, fcsUnicode, fcsUnicodeSwapped, fcsUtf8);
+
 {$I const.inc}

  function trimc(s: String; c: Char) : String;
@ -93,7 +95,9 @@ type
  function GetDBObjectType( TableStatus: TFields ): Byte;
  procedure SetWindowSizeGrip(hWnd: HWND; Enable: boolean);
  procedure SaveUnicodeFile(Filename: String; Text: WideString);
-  function ReadUnicodeFile(Filename: String): WideString;
+  procedure OpenTextFile(const Filename: String; out Stream: TFileStream; out FileCharset: TFileCharset);
+  function ReadTextfileChunk(Stream: TFileStream; FileCharset: TFileCharset; ChunkSize: Cardinal = 0): WideString;
+  function ReadTextfile(Filename: String): WideString;

 var
  MYSQL_KEYWORDS             : TStringList;
@ -2375,86 +2379,99 @@ end;


 {**
-  Read a unicode or ansi file into memory
+  Open a textfile unicode safe and return a stream + its charset
 }
-function ReadUnicodeFile(Filename: String): WideString;
+procedure OpenTextFile(const Filename: String; out Stream: TFileStream; out FileCharset: TFileCharset);
 var
-  Stream: TFileStream;
  ByteOrderMark: WideChar;
  BytesRead: Integer;
  Utf8Test: array[0..2] of AnsiChar;
-  DataLeft: Integer;
-  StreamCharSet: Byte;
-  SA: AnsiString;
-  P: PWord;
 const
  UNICODE_BOM = WideChar($FEFF);
  UNICODE_BOM_SWAPPED = WideChar($FFFE);
  UTF8_BOM = AnsiString(#$EF#$BB#$BF);
-  sscAnsi = 0;
-  sscUnicode = 1;
-  sscUnicodeSwapped = 2;
-  sscUtf8 = 3;
 begin
  Stream := TFileStream.Create(Filename, fmOpenRead or fmShareDenyWrite);
+  Stream.Position := 0;
+
+  // Byte Order Mark
+  ByteOrderMark := #0;
+  if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then begin
+    BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
+    if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then begin
+      ByteOrderMark := #0;
+      Stream.Seek(-BytesRead, soFromCurrent);
+      if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then begin
+        BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
+        if Utf8Test <> UTF8_BOM then
+          Stream.Seek(-BytesRead, soFromCurrent);
+      end;
+    end;
+  end;
+  // Test Byte Order Mark
+  if ByteOrderMark = UNICODE_BOM then
+    FileCharset := fcsUnicode
+  else if ByteOrderMark = UNICODE_BOM_SWAPPED then
+    FileCharset := fcsUnicodeSwapped
+  else if Utf8Test = UTF8_BOM then
+    FileCharset := fcsUtf8
+  else
+    FileCharset := fcsAnsi;
+end;
+
+
+{**
+  Read a chunk out of a textfile unicode safe by passing a stream and its charset
+}
+function ReadTextfileChunk(Stream: TFileStream; FileCharset: TFileCharset; ChunkSize: Cardinal = 0): WideString;
+var
+  SA: AnsiString;
+  P: PWord;
+begin
+  if ChunkSize = 0 then
+    ChunkSize := Stream.Size - Stream.Position;
+  if (FileCharset in [fcsUnicode, fcsUnicodeSwapped]) then begin
+    // BOM indicates Unicode text stream
+    if ChunkSize < SizeOf(WideChar) then
+      Result := ''
+    else begin
+      SetLength(Result, ChunkSize div SizeOf(WideChar));
+      Stream.Read(PWideChar(Result)^, ChunkSize);
+      if FileCharset = fcsUnicodeSwapped then begin
+        P := PWord(PWideChar(Result));
+        While (P^ <> 0) do begin
+          P^ := MakeWord(HiByte(P^), LoByte(P^));
+          Inc(P);
+        end;
+      end;
+    end;
+  end else if FileCharset = fcsUtf8 then begin
+    // BOM indicates UTF-8 text stream
+    SetLength(SA, ChunkSize div SizeOf(AnsiChar));
+    Stream.Read(PAnsiChar(SA)^, ChunkSize);
+    Result := UTF8Decode(SA);
+  end else begin
+    // without byte order mark it is assumed that we are loading ANSI text
+    SetLength(SA, ChunkSize div SizeOf(AnsiChar));
+    Stream.Read(PAnsiChar(SA)^, ChunkSize);
+    Result := SA;
+  end;
+end;
+
+{**
+  Read a unicode or ansi file into memory
+}
+function ReadTextfile(Filename: String): WideString;
+var
+  Stream: TFileStream;
+  FileCharset: TFileCharset;
+begin
  try
-    Stream.Position := 0;
-
-    // Byte Order Mark
-    ByteOrderMark := #0;
-    if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then begin
-      BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
-      if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then begin
-        ByteOrderMark := #0;
-        Stream.Seek(-BytesRead, soFromCurrent);
-        if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then begin
-          BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
-          if Utf8Test <> UTF8_BOM then
-            Stream.Seek(-BytesRead, soFromCurrent);
-        end;
-      end;
-    end;
-    // Test Byte Order Mark
-    if ByteOrderMark = UNICODE_BOM then
-      StreamCharSet := sscUnicode
-    else if ByteOrderMark = UNICODE_BOM_SWAPPED then
-      StreamCharSet := sscUnicodeSwapped
-    else if Utf8Test = UTF8_BOM then
-      StreamCharSet := sscUtf8
-    else
-      StreamCharSet := sscAnsi;
-
-    DataLeft := Stream.Size - Stream.Position;
-    if (StreamCharSet in [sscUnicode, sscUnicodeSwapped]) then begin
-      // BOM indicates Unicode text stream
-      if DataLeft < SizeOf(WideChar) then
-        Result := ''
-      else begin
-        SetLength(Result, DataLeft div SizeOf(WideChar));
-        Stream.Read(PWideChar(Result)^, DataLeft);
-        if StreamCharSet = sscUnicodeSwapped then begin
-          P := PWord(PWideChar(Result));
-          While (P^ <> 0) do begin
-            P^ := MakeWord(HiByte(P^), LoByte(P^));
-            Inc(P);
-          end;
-        end;
-      end;
-    end else if StreamCharSet = sscUtf8 then begin
-      // BOM indicates UTF-8 text stream
-      SetLength(SA, DataLeft div SizeOf(AnsiChar));
-      Stream.Read(PAnsiChar(SA)^, DataLeft);
-      Result := UTF8Decode(SA);
-    end else begin
-      // without byte order mark it is assumed that we are loading ANSI text
-      SetLength(SA, DataLeft div SizeOf(AnsiChar));
-      Stream.Read(PAnsiChar(SA)^, DataLeft);
-      Result := SA;
-    end;
+    OpenTextfile(Filename, Stream, FileCharset);
+    Result := ReadTextfileChunk(Stream, FileCharset);
  finally
    Stream.Free;
  end;
-
 end;