From 03bf17f2b0b1b119168ee1801681bcf54dca5a84 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 27 Apr 2016 11:18:14 -0700 Subject: [PATCH] Add a --remove option to fix-encoding-pragma --- README.md | 3 +- pre_commit_hooks/fix_encoding_pragma.py | 101 +++++++++++++++++------- tests/fix_encoding_pragma_test.py | 64 ++++++++++----- 3 files changed, 120 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index 091a8a4..6c3a3ec 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,8 @@ Add this to your `.pre-commit-config.yaml` - `double-quote-string-fixer` - This hook replaces double quoted strings with single quoted strings. - `end-of-file-fixer` - Makes sure files end in a newline and only a newline. -- `fix-encoding-pragma` - Add `# -*- coding: utf-8 -*-` to the top of python files +- `fix-encoding-pragma` - Add `# -*- coding: utf-8 -*-` to the top of python files. + - To remove the coding pragma pass `--remove` (useful in a python3-only codebase) - `flake8` - Run flake8 on your python files. - `name-tests-test` - Assert that files in tests/ end in `_test.py`. - Use `args: ['--django']` to match `test*.py` instead. diff --git a/pre_commit_hooks/fix_encoding_pragma.py b/pre_commit_hooks/fix_encoding_pragma.py index 48fc9c7..8586937 100644 --- a/pre_commit_hooks/fix_encoding_pragma.py +++ b/pre_commit_hooks/fix_encoding_pragma.py @@ -3,7 +3,7 @@ from __future__ import print_function from __future__ import unicode_literals import argparse -import io +import collections expected_pragma = b'# -*- coding: utf-8 -*-\n' @@ -21,34 +21,72 @@ def has_coding(line): ) -def fix_encoding_pragma(f): - first_line = f.readline() - second_line = f.readline() - old = f.read() - f.seek(0) +class ExpectedContents(collections.namedtuple( + 'ExpectedContents', ('shebang', 'rest', 'pragma_status'), +)): + """ + pragma_status: + - True: has exactly the coding pragma expected + - False: missing coding pragma entirely + - None: has a coding pragma, but it does not match + """ + __slots__ = () - # Ok case: the file is empty - if not (first_line + second_line + old).strip(): - return 0 + @property + def has_any_pragma(self): + return self.pragma_status is not False - # Ok case: we specify pragma as the first line - if first_line == expected_pragma: - return 0 + def is_expected_pragma(self, remove): + expected_pragma_status = not remove + return self.pragma_status is expected_pragma_status - # OK case: we have a shebang as first line and pragma on second line - if first_line.startswith(b'#!') and second_line == expected_pragma: - return 0 - # Otherwise we need to rewrite stuff! +def _get_expected_contents(first_line, second_line, rest): if first_line.startswith(b'#!'): - if has_coding(second_line): - f.write(first_line + expected_pragma + old) - else: - f.write(first_line + expected_pragma + second_line + old) - elif has_coding(first_line): - f.write(expected_pragma + second_line + old) + shebang = first_line + potential_coding = second_line else: - f.write(expected_pragma + first_line + second_line + old) + shebang = b'' + potential_coding = first_line + rest = second_line + rest + + if potential_coding == expected_pragma: + pragma_status = True + elif has_coding(potential_coding): + pragma_status = None + else: + pragma_status = False + rest = potential_coding + rest + + return ExpectedContents( + shebang=shebang, rest=rest, pragma_status=pragma_status, + ) + + +def fix_encoding_pragma(f, remove=False): + expected = _get_expected_contents(f.readline(), f.readline(), f.read()) + + # Special cases for empty files + if not expected.rest.strip(): + # If a file only has a shebang or a coding pragma, remove it + if expected.has_any_pragma or expected.shebang: + f.seek(0) + f.truncate() + f.write(b'') + return 1 + else: + return 0 + + if expected.is_expected_pragma(remove): + return 0 + + # Otherwise, write out the new file + f.seek(0) + f.truncate() + f.write(expected.shebang) + if not remove: + f.write(expected_pragma) + f.write(expected.rest) return 1 @@ -56,18 +94,25 @@ def fix_encoding_pragma(f): def main(argv=None): parser = argparse.ArgumentParser('Fixes the encoding pragma of python files') parser.add_argument('filenames', nargs='*', help='Filenames to fix') + parser.add_argument( + '--remove', action='store_true', + help='Remove the encoding pragma (Useful in a python3-only codebase)', + ) args = parser.parse_args(argv) retv = 0 + if args.remove: + fmt = 'Removed encoding pragma from {filename}' + else: + fmt = 'Added `{pragma}` to {filename}' + for filename in args.filenames: - with io.open(filename, 'r+b') as f: - file_ret = fix_encoding_pragma(f) + with open(filename, 'r+b') as f: + file_ret = fix_encoding_pragma(f, remove=args.remove) retv |= file_ret if file_ret: - print('Added `{0}` to {1}'.format( - expected_pragma.strip(), filename, - )) + print(fmt.format(pragma=expected_pragma, filename=filename)) return retv diff --git a/tests/fix_encoding_pragma_test.py b/tests/fix_encoding_pragma_test.py index e000a33..a9502a2 100644 --- a/tests/fix_encoding_pragma_test.py +++ b/tests/fix_encoding_pragma_test.py @@ -10,32 +10,46 @@ from pre_commit_hooks.fix_encoding_pragma import main def test_integration_inserting_pragma(tmpdir): - file_path = tmpdir.join('foo.py').strpath + path = tmpdir.join('foo.py') + path.write_binary(b'import httplib\n') - with open(file_path, 'wb') as file_obj: - file_obj.write(b'import httplib\n') + assert main((path.strpath,)) == 1 - assert main([file_path]) == 1 - - with open(file_path, 'rb') as file_obj: - assert file_obj.read() == ( - b'# -*- coding: utf-8 -*-\n' - b'import httplib\n' - ) + assert path.read_binary() == ( + b'# -*- coding: utf-8 -*-\n' + b'import httplib\n' + ) def test_integration_ok(tmpdir): - file_path = tmpdir.join('foo.py').strpath - with open(file_path, 'wb') as file_obj: - file_obj.write(b'# -*- coding: utf-8 -*-\nx = 1\n') - assert main([file_path]) == 0 + path = tmpdir.join('foo.py') + path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n') + assert main((path.strpath,)) == 0 + + +def test_integration_remove(tmpdir): + path = tmpdir.join('foo.py') + path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n') + + assert main((path.strpath, '--remove')) == 1 + + assert path.read_binary() == b'x = 1\n' + + +def test_integration_remove_ok(tmpdir): + path = tmpdir.join('foo.py') + path.write_binary(b'x = 1\n') + assert main((path.strpath, '--remove')) == 0 @pytest.mark.parametrize( 'input_str', ( b'', - b'# -*- coding: utf-8 -*-\n', + ( + b'# -*- coding: utf-8 -*-\n' + b'x = 1\n' + ), ( b'#!/usr/bin/env python\n' b'# -*- coding: utf-8 -*-\n' @@ -59,20 +73,32 @@ def test_ok_inputs(input_str): b'import httplib\n', ), ( - b'#!/usr/bin/env python\n', + b'#!/usr/bin/env python\n' + b'x = 1\n', b'#!/usr/bin/env python\n' b'# -*- coding: utf-8 -*-\n' + b'x = 1\n', ), ( - b'#coding=utf-8\n', + b'#coding=utf-8\n' + b'x = 1\n', b'# -*- coding: utf-8 -*-\n' + b'x = 1\n', ), ( b'#!/usr/bin/env python\n' - b'#coding=utf8\n', + b'#coding=utf8\n' + b'x = 1\n', b'#!/usr/bin/env python\n' - b'# -*- coding: utf-8 -*-\n', + b'# -*- coding: utf-8 -*-\n' + b'x = 1\n', ), + # These should each get truncated + (b'#coding: utf-8\n', b''), + (b'# -*- coding: utf-8 -*-\n', b''), + (b'#!/usr/bin/env python\n', b''), + (b'#!/usr/bin/env python\n#coding: utf8\n', b''), + (b'#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n', b''), ) ) def test_not_ok_inputs(input_str, output):