pre-commit-hooks/pre_commit_hooks/fix_encoding_pragma.py

from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import collections

expected_pragma = b'# -*- coding: utf-8 -*-\n'


def has_coding(line):
    if not line.strip():
        return False
    return (
        line.lstrip()[0:1] == b'#' and (
            b'unicode' in line or
            b'encoding' in line or
            b'coding:' in line or
            b'coding=' in line
        )
    )


class ExpectedContents(collections.namedtuple(
        'ExpectedContents', ('shebang', 'rest', 'pragma_status'),
)):
    """
    pragma_status:
    - True: has exactly the coding pragma expected
    - False: missing coding pragma entirely
    - None: has a coding pragma, but it does not match
    """
    __slots__ = ()

    @property
    def has_any_pragma(self):
        return self.pragma_status is not False

    def is_expected_pragma(self, remove):
        expected_pragma_status = not remove
        return self.pragma_status is expected_pragma_status


def _get_expected_contents(first_line, second_line, rest):
    if first_line.startswith(b'#!'):
        shebang = first_line
        potential_coding = second_line
    else:
        shebang = b''
        potential_coding = first_line
        rest = second_line + rest

    if potential_coding == expected_pragma:
        pragma_status = True
    elif has_coding(potential_coding):
        pragma_status = None
    else:
        pragma_status = False
        rest = potential_coding + rest

    return ExpectedContents(
        shebang=shebang, rest=rest, pragma_status=pragma_status,
    )


def fix_encoding_pragma(f, remove=False):
    expected = _get_expected_contents(f.readline(), f.readline(), f.read())

    # Special cases for empty files
    if not expected.rest.strip():
        # If a file only has a shebang or a coding pragma, remove it
        if expected.has_any_pragma or expected.shebang:
            f.seek(0)
            f.truncate()
            f.write(b'')
            return 1
        else:
            return 0

    if expected.is_expected_pragma(remove):
        return 0

    # Otherwise, write out the new file
    f.seek(0)
    f.truncate()
    f.write(expected.shebang)
    if not remove:
        f.write(expected_pragma)
    f.write(expected.rest)

    return 1


def main(argv=None):
    parser = argparse.ArgumentParser('Fixes the encoding pragma of python files')
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    parser.add_argument(
        '--remove', action='store_true',
        help='Remove the encoding pragma (Useful in a python3-only codebase)',
    )
    args = parser.parse_args(argv)

    retv = 0

    if args.remove:
        fmt = 'Removed encoding pragma from {filename}'
    else:
        fmt = 'Added `{pragma}` to {filename}'

    for filename in args.filenames:
        with open(filename, 'r+b') as f:
            file_ret = fix_encoding_pragma(f, remove=args.remove)
            retv |= file_ret
            if file_ret:
                print(fmt.format(pragma=expected_pragma, filename=filename))

    return retv

if __name__ == "__main__":
    exit(main())