Fix encoding detection and exception on empty files (#2195)

The encoding detection code was trying to catch encoding-related
exceptions when the file is opened. This doesn't make sense, because
at this point no data has been read, therefore no encoding errors can be
detected. Instead, catch encoding-related exceptions when the file
contents are read.

Also avoid bailing out with `Exception('Unknown encoding')` on empty
files.
This commit is contained in:
Dimitri Papadopoulos Orfanos
2022-10-12 19:17:12 +02:00
committed by GitHub
parent ad644529d2
commit 900f18654b
2 changed files with 32 additions and 22 deletions

View File

@ -272,6 +272,19 @@ def test_encoding(tmpdir, capsys):
with open(f.name, 'ab') as f:
f.write(u'naieve\n'.encode('utf-8'))
assert cs.main(f.name) == 1
# Encoding detection (only try ISO 8859-1 because UTF-8 is the default)
with open(f.name, 'wb') as f:
f.write(b'Speling error, non-ASCII: h\xe9t\xe9rog\xe9n\xe9it\xe9\n')
# check warnings about wrong encoding are enabled with "-q 0"
code, stdout, stderr = cs.main('-q', '0', f.name, std=True, count=True)
assert code == 1
assert 'Speling' in stdout
assert 'iso-8859-1' in stderr
# check warnings about wrong encoding are disabled with "-q 1"
code, stdout, stderr = cs.main('-q', '1', f.name, std=True, count=True)
assert code == 1
assert 'Speling' in stdout
assert 'iso-8859-1' not in stderr
# Binary file warning
with open(f.name, 'wb') as f:
f.write(b'\x00\x00naiive\x00\x00')