Rename ReadUnicodeFile to ReadTextfile and split it into 3 different procedures:

- OpenTextfile - ReadTextfileChunk - ReadTextfile Enables us to use the unicode detection in places where we need to read a file chunkwise. (e.g. the runsqlfile unit) TODO: Fix UTF-8 detection, misdetected as ANSI currently.
2025-08-26 22:00:16 +08:00 · 2008-06-19 19:08:54 +00:00
parent 4cd5743980
commit 58230fea56
2 changed files with 84 additions and 67 deletions
--- a/source/childwin.pas
+++ b/source/childwin.pas
@ -4276,7 +4276,7 @@ begin
  // so we have to do it by replacing the SelText property
  Screen.Cursor := crHourGlass;
  try
-    filecontent := ReadUnicodeFile(filename);
+    filecontent := ReadTextfile(filename);
  except
    on E: Exception do
    begin
--- a/source/helpers.pas
+++ b/source/helpers.pas
@ -27,6 +27,8 @@ type
  TVTreeDataArray = Array of TVTreeData;
  PVTreeDataArray = ^TVTreeDataArray;
  TFileCharset = (fcsAnsi, fcsUnicode, fcsUnicodeSwapped, fcsUtf8);
 {$I const.inc}
  function trimc(s: String; c: Char) : String;
@ -93,7 +95,9 @@ type
  function GetDBObjectType( TableStatus: TFields ): Byte;
  procedure SetWindowSizeGrip(hWnd: HWND; Enable: boolean);
  procedure SaveUnicodeFile(Filename: String; Text: WideString);
-  function ReadUnicodeFile(Filename: String): WideString;
+  procedure OpenTextFile(const Filename: String; out Stream: TFileStream; out FileCharset: TFileCharset);
  function ReadTextfileChunk(Stream: TFileStream; FileCharset: TFileCharset; ChunkSize: Cardinal = 0): WideString;
  function ReadTextfile(Filename: String): WideString;
 var
  MYSQL_KEYWORDS             : TStringList;
@ -2375,86 +2379,99 @@ end;
 {**
-  Read a unicode or ansi file into memory
+  Open a textfile unicode safe and return a stream + its charset
 }
-function ReadUnicodeFile(Filename: String): WideString;
+procedure OpenTextFile(const Filename: String; out Stream: TFileStream; out FileCharset: TFileCharset);
 var
  Stream: TFileStream;
  ByteOrderMark: WideChar;
  BytesRead: Integer;
  Utf8Test: array[0..2] of AnsiChar;
  DataLeft: Integer;
  StreamCharSet: Byte;
  SA: AnsiString;
  P: PWord;
 const
  UNICODE_BOM = WideChar($FEFF);
  UNICODE_BOM_SWAPPED = WideChar($FFFE);
  UTF8_BOM = AnsiString(#$EF#$BB#$BF);
  sscAnsi = 0;
  sscUnicode = 1;
  sscUnicodeSwapped = 2;
  sscUtf8 = 3;
 begin
  Stream := TFileStream.Create(Filename, fmOpenRead or fmShareDenyWrite);
  Stream.Position := 0;
  // Byte Order Mark
  ByteOrderMark := #0;
  if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then begin
    BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
    if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then begin
      ByteOrderMark := #0;
      Stream.Seek(-BytesRead, soFromCurrent);
      if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then begin
        BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
        if Utf8Test <> UTF8_BOM then
          Stream.Seek(-BytesRead, soFromCurrent);
      end;
    end;
  end;
  // Test Byte Order Mark
  if ByteOrderMark = UNICODE_BOM then
    FileCharset := fcsUnicode
  else if ByteOrderMark = UNICODE_BOM_SWAPPED then
    FileCharset := fcsUnicodeSwapped
  else if Utf8Test = UTF8_BOM then
    FileCharset := fcsUtf8
  else
    FileCharset := fcsAnsi;
 end;
 {**
  Read a chunk out of a textfile unicode safe by passing a stream and its charset
 }
 function ReadTextfileChunk(Stream: TFileStream; FileCharset: TFileCharset; ChunkSize: Cardinal = 0): WideString;
 var
  SA: AnsiString;
  P: PWord;
 begin
  if ChunkSize = 0 then
    ChunkSize := Stream.Size - Stream.Position;
  if (FileCharset in [fcsUnicode, fcsUnicodeSwapped]) then begin
    // BOM indicates Unicode text stream
    if ChunkSize < SizeOf(WideChar) then
      Result := ''
    else begin
      SetLength(Result, ChunkSize div SizeOf(WideChar));
      Stream.Read(PWideChar(Result)^, ChunkSize);
      if FileCharset = fcsUnicodeSwapped then begin
        P := PWord(PWideChar(Result));
        While (P^ <> 0) do begin
          P^ := MakeWord(HiByte(P^), LoByte(P^));
          Inc(P);
        end;
      end;
    end;
  end else if FileCharset = fcsUtf8 then begin
    // BOM indicates UTF-8 text stream
    SetLength(SA, ChunkSize div SizeOf(AnsiChar));
    Stream.Read(PAnsiChar(SA)^, ChunkSize);
    Result := UTF8Decode(SA);
  end else begin
    // without byte order mark it is assumed that we are loading ANSI text
    SetLength(SA, ChunkSize div SizeOf(AnsiChar));
    Stream.Read(PAnsiChar(SA)^, ChunkSize);
    Result := SA;
  end;
 end;
 {**
  Read a unicode or ansi file into memory
 }
 function ReadTextfile(Filename: String): WideString;
 var
  Stream: TFileStream;
  FileCharset: TFileCharset;
 begin
  try
-    Stream.Position := 0;
+    OpenTextfile(Filename, Stream, FileCharset);
-
+    Result := ReadTextfileChunk(Stream, FileCharset);
    // Byte Order Mark
    ByteOrderMark := #0;
    if (Stream.Size - Stream.Position) >= SizeOf(ByteOrderMark) then begin
      BytesRead := Stream.Read(ByteOrderMark, SizeOf(ByteOrderMark));
      if (ByteOrderMark <> UNICODE_BOM) and (ByteOrderMark <> UNICODE_BOM_SWAPPED) then begin
        ByteOrderMark := #0;
        Stream.Seek(-BytesRead, soFromCurrent);
        if (Stream.Size - Stream.Position) >= Length(Utf8Test) * SizeOf(AnsiChar) then begin
          BytesRead := Stream.Read(Utf8Test[0], Length(Utf8Test) * SizeOf(AnsiChar));
          if Utf8Test <> UTF8_BOM then
            Stream.Seek(-BytesRead, soFromCurrent);
        end;
      end;
    end;
    // Test Byte Order Mark
    if ByteOrderMark = UNICODE_BOM then
      StreamCharSet := sscUnicode
    else if ByteOrderMark = UNICODE_BOM_SWAPPED then
      StreamCharSet := sscUnicodeSwapped
    else if Utf8Test = UTF8_BOM then
      StreamCharSet := sscUtf8
    else
      StreamCharSet := sscAnsi;
    DataLeft := Stream.Size - Stream.Position;
    if (StreamCharSet in [sscUnicode, sscUnicodeSwapped]) then begin
      // BOM indicates Unicode text stream
      if DataLeft < SizeOf(WideChar) then
        Result := ''
      else begin
        SetLength(Result, DataLeft div SizeOf(WideChar));
        Stream.Read(PWideChar(Result)^, DataLeft);
        if StreamCharSet = sscUnicodeSwapped then begin
          P := PWord(PWideChar(Result));
          While (P^ <> 0) do begin
            P^ := MakeWord(HiByte(P^), LoByte(P^));
            Inc(P);
          end;
        end;
      end;
    end else if StreamCharSet = sscUtf8 then begin
      // BOM indicates UTF-8 text stream
      SetLength(SA, DataLeft div SizeOf(AnsiChar));
      Stream.Read(PAnsiChar(SA)^, DataLeft);
      Result := UTF8Decode(SA);
    end else begin
      // without byte order mark it is assumed that we are loading ANSI text
      SetLength(SA, DataLeft div SizeOf(AnsiChar));
      Stream.Read(PAnsiChar(SA)^, DataLeft);
      Result := SA;
    end;
  finally
    Stream.Free;
  end;
 end;