From 70e79f51dc7efe480e2bae716cd29f0486c8006f Mon Sep 17 00:00:00 2001 From: Ansgar Becker Date: Sat, 27 Jul 2013 08:06:40 +0000 Subject: [PATCH] * Ensure ReadTextfileChunk reads a multiplier of the encoding's maximum byte count per char. See http://www.heidisql.com/forum.php?t=13044 * Log error message when TEncoding.Convert returns an empty TByte array. * Documentation --- source/helpers.pas | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/source/helpers.pas b/source/helpers.pas index 5491e860..11e4c8bb 100644 --- a/source/helpers.pas +++ b/source/helpers.pas @@ -1280,6 +1280,7 @@ end; ANSI Aimed to work better than WideStrUtils.IsUTF8String() which didn't work in any test case here. @see http://en.wikipedia.org/wiki/Byte_Order_Mark + Could also do that with TEncoding.GetBufferEncoding, but that relies on the file having a BOM } function DetectEncoding(Stream: TStream): TEncoding; var @@ -1432,12 +1433,19 @@ var DataLeft: Int64; LBuffer: TBytes; begin + // Be sure to read a multiplier of the encodings max byte count per char + if ChunkSize mod 4 > 0 then + Inc(ChunkSize, ChunkSize mod 4); DataLeft := Stream.Size - Stream.Position; if (ChunkSize = 0) or (ChunkSize > DataLeft) then ChunkSize := DataLeft; SetLength(LBuffer, ChunkSize); Stream.ReadBuffer(Pointer(LBuffer)^, ChunkSize); - LBuffer := Encoding.Convert(Encoding, TEncoding.Unicode, LBuffer, 0, Length(LBuffer)); + // Now, TEncoding.Convert returns an empty TByte array in files with russion characters + // See http://www.heidisql.com/forum.php?t=13044 + LBuffer := Encoding.Convert(Encoding, TEncoding.Unicode, LBuffer); + if Length(LBuffer) = 0 then + MainForm.LogSQL('Error when converting chunk from encoding '+Encoding.EncodingName+' to '+TEncoding.Unicode.EncodingName+' in '+ExtractFileName(Stream.FileName)+' at position '+FormatByteNumber(Stream.Position)); Result := TEncoding.Unicode.GetString(LBuffer); end;