mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2025-08-14 17:31:28 +08:00

On Windows, all files are "executable". Therefore, to know if a file is supposed to be executed, we check how its attributes were recorded by git: we run a `git ls-files` command in a subprocess. By default, this command outputs information on multiple lines (file and their data separated by newlines). When a file contains an unusual character, the character is escaped with an integer sequence (such as `\303\261`), and git wraps the whole filename in double-quotes because of the backslashes. It breaks the current code because we try to open the filename containing the double-quotes: it doesn't exist, of course. Instead of trying to fix this special case by removing the double-quotes, and breaking other cases (a double-quote is a valid filename character on Linux), we tell git to separate each item with the null character `\0` instead of a new line `\n`, with the option `-z`. With this option, git doesn't escape unusual characters with integer sequence, so the output is fixed, and we parse it by splitting on `\0` instead of `\n`. Fixes #508.
79 lines
2.0 KiB
Python
79 lines
2.0 KiB
Python
"""Check that executable text files have a shebang."""
|
|
import argparse
|
|
import shlex
|
|
import sys
|
|
from typing import List
|
|
from typing import Optional
|
|
from typing import Sequence
|
|
from typing import Set
|
|
|
|
from pre_commit_hooks.util import cmd_output
|
|
|
|
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
|
|
|
|
|
|
def zsplit(s: str) -> List[str]:
|
|
s = s.strip('\0')
|
|
if s:
|
|
return s.split('\0')
|
|
else:
|
|
return []
|
|
|
|
|
|
def check_executables(paths: List[str]) -> int:
|
|
if sys.platform == 'win32': # pragma: win32 cover
|
|
return _check_git_filemode(paths)
|
|
else: # pragma: win32 no cover
|
|
retv = 0
|
|
for path in paths:
|
|
if not _check_has_shebang(path):
|
|
_message(path)
|
|
retv = 1
|
|
|
|
return retv
|
|
|
|
|
|
def _check_git_filemode(paths: Sequence[str]) -> int:
|
|
outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
|
|
seen: Set[str] = set()
|
|
for out in zsplit(outs):
|
|
metadata, path = out.split('\t')
|
|
tagmode = metadata.split(' ', 1)[0]
|
|
|
|
is_executable = any(b in EXECUTABLE_VALUES for b in tagmode[-3:])
|
|
has_shebang = _check_has_shebang(path)
|
|
if is_executable and not has_shebang:
|
|
_message(path)
|
|
seen.add(path)
|
|
|
|
return int(bool(seen))
|
|
|
|
|
|
def _check_has_shebang(path: str) -> int:
|
|
with open(path, 'rb') as f:
|
|
first_bytes = f.read(2)
|
|
|
|
return first_bytes == b'#!'
|
|
|
|
|
|
def _message(path: str) -> None:
|
|
print(
|
|
f'{path}: marked executable but has no (or invalid) shebang!\n'
|
|
f" If it isn't supposed to be executable, try: "
|
|
f'`chmod -x {shlex.quote(path)}`\n'
|
|
f' If it is supposed to be executable, double-check its shebang.',
|
|
file=sys.stderr,
|
|
)
|
|
|
|
|
|
def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument('filenames', nargs='*')
|
|
args = parser.parse_args(argv)
|
|
|
|
return check_executables(args.filenames)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|