Files
pre-commit-hooks/pre_commit_hooks/check_executables_have_shebangs.py
Timothée Mazzucotelli 4faed34fbc Fix parsing of git output with unusual characters
On Windows, all files are "executable".
Therefore, to know if a file is supposed to be executed,
we check how its attributes were recorded by git:
we run a `git ls-files` command in a subprocess.

By default, this command outputs information
on multiple lines (file and their data separated by newlines).
When a file contains an unusual character,
the character is escaped with an integer sequence
(such as `\303\261`), and git wraps the whole filename
in double-quotes because of the backslashes.
It breaks the current code because we try to open
the filename containing the double-quotes:
it doesn't exist, of course.

Instead of trying to fix this special case by removing
the double-quotes, and breaking other cases
(a double-quote is a valid filename character on Linux),
we tell git to separate each item with the null character `\0`
instead of a new line `\n`, with the option `-z`.
With this option, git doesn't escape unusual characters
with integer sequence, so the output is fixed, and we
parse it by splitting on `\0` instead of `\n`.

Fixes #508.
2020-07-30 11:58:24 -07:00

79 lines
2.0 KiB
Python

"""Check that executable text files have a shebang."""
import argparse
import shlex
import sys
from typing import List
from typing import Optional
from typing import Sequence
from typing import Set
from pre_commit_hooks.util import cmd_output
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []
def check_executables(paths: List[str]) -> int:
if sys.platform == 'win32': # pragma: win32 cover
return _check_git_filemode(paths)
else: # pragma: win32 no cover
retv = 0
for path in paths:
if not _check_has_shebang(path):
_message(path)
retv = 1
return retv
def _check_git_filemode(paths: Sequence[str]) -> int:
outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
seen: Set[str] = set()
for out in zsplit(outs):
metadata, path = out.split('\t')
tagmode = metadata.split(' ', 1)[0]
is_executable = any(b in EXECUTABLE_VALUES for b in tagmode[-3:])
has_shebang = _check_has_shebang(path)
if is_executable and not has_shebang:
_message(path)
seen.add(path)
return int(bool(seen))
def _check_has_shebang(path: str) -> int:
with open(path, 'rb') as f:
first_bytes = f.read(2)
return first_bytes == b'#!'
def _message(path: str) -> None:
print(
f'{path}: marked executable but has no (or invalid) shebang!\n'
f" If it isn't supposed to be executable, try: "
f'`chmod -x {shlex.quote(path)}`\n'
f' If it is supposed to be executable, double-check its shebang.',
file=sys.stderr,
)
def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('filenames', nargs='*')
args = parser.parse_args(argv)
return check_executables(args.filenames)
if __name__ == '__main__':
exit(main())