mirror of
https://github.com/codespell-project/codespell.git
synced 2025-05-17 07:27:03 +08:00
267 lines
8.2 KiB
Python
Executable File
267 lines
8.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; version 2 of the License.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, see
|
|
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
|
|
"""
|
|
Copyright (C) 2010 Lucas De Marchi <lucas.de.marchi@gmail.com>
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
from optparse import OptionParser
|
|
import os
|
|
|
|
USAGE = """
|
|
\t%prog [OPTIONS] dict_filename [file1 file2 ... fileN]
|
|
"""
|
|
VERSION = '1.0.3'
|
|
|
|
misspellings = {}
|
|
options = None
|
|
encodings = [ 'utf-8', 'iso-8859-1' ]
|
|
|
|
#OPTIONS:
|
|
#
|
|
#ARGUMENTS:
|
|
# dict_filename The file containing the dictionary of misspellings.
|
|
# If set to '-', it will be read from stdin
|
|
# file1 .. fileN Files to check spelling
|
|
|
|
class Mispell:
|
|
def __init__(self, data, fix, reason):
|
|
self.data = data
|
|
self.fix = fix
|
|
self.reason = reason
|
|
|
|
class TermColors:
|
|
def __init__(self):
|
|
self.FILE = '\033[33m'
|
|
self.WWORD = '\033[31m'
|
|
self.FWORD = '\033[32m'
|
|
self.DISABLE = '\033[0m'
|
|
|
|
def disable(self):
|
|
self.FILE = ''
|
|
self.WWORD = ''
|
|
self.FWORD = ''
|
|
self.DISABLE = ''
|
|
|
|
# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
|
|
|
|
def parse_options(args):
|
|
parser = OptionParser(usage=USAGE, version=VERSION)
|
|
|
|
parser.add_option('-d', '--disable-colors',
|
|
action = 'store_true', default = False,
|
|
help = 'Disable colors even when printing to terminal')
|
|
parser.add_option('-r', '-R',
|
|
action = 'store_true', default = False,
|
|
dest = 'recursive',
|
|
help = 'parse directories recursively')
|
|
parser.add_option('-w', '--write-changes',
|
|
action = 'store_true', default = False,
|
|
help = 'write changes in place if possible')
|
|
|
|
(o, args) = parser.parse_args()
|
|
if (len(args) < 1):
|
|
print('ERROR: you need to specify a dictionary!', file=sys.stderr)
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
if (len(args) == 1):
|
|
args.append('-')
|
|
|
|
return o, args
|
|
|
|
|
|
def build_dict(filename):
|
|
with open(filename, 'r') as f:
|
|
for line in f:
|
|
[key, data] = line.split('->')
|
|
data = data.strip()
|
|
fix = data.rfind(',')
|
|
|
|
if fix < 0:
|
|
fix = True
|
|
reason = ''
|
|
elif fix == (len(data) - 1):
|
|
data = data[:fix]
|
|
reason = ''
|
|
fix = False
|
|
else:
|
|
reason = data[fix + 1:].strip()
|
|
data = data[:fix]
|
|
fix = False
|
|
|
|
misspellings[key] = Mispell(data, fix, reason)
|
|
|
|
def ishidden(filename):
|
|
bfilename = os.path.basename(filename)
|
|
|
|
if bfilename != '' and bfilename != '.' and bfilename != '..' \
|
|
and bfilename[0] == '.':
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def istextfile(filename):
|
|
with open(filename, mode='rb') as f:
|
|
s = f.read(1024)
|
|
if 0 in s:
|
|
return False
|
|
|
|
return True
|
|
|
|
def parse_file(filename, colors):
|
|
lines = None
|
|
changed = False
|
|
global misspellings
|
|
global options
|
|
global encodings
|
|
|
|
if filename == '-':
|
|
f = sys.stdin
|
|
lines = f.readlines()
|
|
else:
|
|
# ignore binary files
|
|
if not istextfile(filename):
|
|
print("WARNING: Binary file: %s " % filename, file=sys.stderr)
|
|
return
|
|
|
|
curr = 0
|
|
while True:
|
|
try:
|
|
f = open(filename, 'r', encoding=encodings[curr])
|
|
lines = f.readlines()
|
|
break
|
|
except UnicodeDecodeError:
|
|
print('WARNING: Decoding file %s' % filename, file=sys.stderr)
|
|
print('WARNING: using encoding=%s failed. '
|
|
% encodings[curr], file=sys.stderr)
|
|
|
|
curr += 1
|
|
print('WARNING: Trying next encoding: %s' % encodings[curr],
|
|
file=sys.stderr)
|
|
|
|
finally:
|
|
f.close()
|
|
|
|
if not lines:
|
|
print('ERROR: Could not detect encoding: %s' % filename,
|
|
file=sys.stderr)
|
|
return
|
|
|
|
i = 1
|
|
for line in lines:
|
|
fixed_words = set()
|
|
for word in re.findall('\w+', line):
|
|
lword = word.lower()
|
|
|
|
if lword in fixed_words:
|
|
continue
|
|
|
|
if lword in misspellings:
|
|
if word == word.capitalize():
|
|
fixword = misspellings[lword].data.capitalize()
|
|
elif word == word.upper():
|
|
fixword = misspellings[lword].data.upper()
|
|
else:
|
|
# even they are the same lower case or
|
|
# or we don't have any idea
|
|
fixword = misspellings[lword].data
|
|
|
|
if options.write_changes and misspellings[lword].fix:
|
|
changed = True
|
|
lines[i - 1] = re.sub(r'\b%s\b' % word, fixword, lines[i - 1])
|
|
fixed_words.add(lword)
|
|
continue
|
|
|
|
cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE)
|
|
cline = "%s%d%s" % (colors.FILE, i, colors.DISABLE)
|
|
cwrongword = "%s%s%s" % (colors.WWORD, word, colors.DISABLE)
|
|
crightword = "%s%s%s" % (colors.FWORD, fixword, colors.DISABLE)
|
|
|
|
if misspellings[lword].reason:
|
|
creason = " | %s%s%s" % (colors.FILE,
|
|
misspellings[lword].reason,
|
|
colors.DISABLE)
|
|
else:
|
|
creason = ''
|
|
|
|
if filename != '-':
|
|
print("%(FILENAME)s:%(LINE)s: %(WRONGWORD)s " \
|
|
" ==> %(RIGHTWORD)s%(REASON)s"
|
|
% {'FILENAME': cfilename, 'LINE': cline,
|
|
'WRONGWORD': cwrongword,
|
|
'RIGHTWORD': crightword, 'REASON': creason })
|
|
else:
|
|
print('%(LINE)s: %(STRLINE)s\n\t%(WRONGWORD)s ' \
|
|
'==> %(RIGHTWORD)s%(REASON)s'
|
|
% { 'LINE': cline, 'STRLINE': line.strip(),
|
|
'WRONGWORD': cwrongword,
|
|
'RIGHTWORD': crightword, 'REASON': creason })
|
|
i += 1
|
|
|
|
if changed:
|
|
if filename == '-':
|
|
print("---")
|
|
for line in lines:
|
|
print(line, end='')
|
|
else:
|
|
print("%sFIXED:%s %s" % (colors.FWORD, colors.DISABLE, filename),
|
|
file=sys.stderr)
|
|
f = open(filename, 'w')
|
|
f.writelines(lines)
|
|
f.close()
|
|
|
|
|
|
def main(*args):
|
|
global options
|
|
(options, args) = parse_options(args)
|
|
|
|
build_dict(args[0])
|
|
colors = TermColors();
|
|
if options.disable_colors:
|
|
colors.disable()
|
|
|
|
for filename in args[1:]:
|
|
# ignore hidden files
|
|
if ishidden(filename):
|
|
continue
|
|
|
|
if not options.recursive and os.path.isdir(filename):
|
|
continue
|
|
|
|
if os.path.isdir(filename):
|
|
for root, dirs, files in os.walk(filename):
|
|
i = 0
|
|
for d in dirs:
|
|
if ishidden(d):
|
|
del dirs[i]
|
|
else:
|
|
i += 1
|
|
|
|
for file in files:
|
|
if os.path.islink(file):
|
|
continue
|
|
|
|
parse_file(os.path.join(root, file), colors)
|
|
|
|
continue
|
|
|
|
parse_file(filename, colors)
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main(*sys.argv))
|