# -*- coding: utf-8 -*-
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
"""
Copyright (C) 2010-2011  Lucas De Marchi <lucas.de.marchi@gmail.com>
Copyright (C) 2011  ProFUSION embedded systems
"""

import argparse
import codecs
import configparser
import fnmatch
import os
import re
import sys
import textwrap

word_regex_def = u"[\\w\\-'’`]+"
# While we want to treat characters like ( or " as okay for a starting break,
# these may occur unescaped in URIs, and so we are more restrictive on the
# endpoint.  Emails are more restrictive, so the endpoint remains flexible.
uri_regex_def = (u"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
                 u"\\b[\\w.%+-]+@[\\w.-]+\\b)")
encodings = ('utf-8', 'iso-8859-1')
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
"""
VERSION = '2.3.0.dev0'

supported_languages_en = ('en', 'en_GB', 'en_US', 'en_CA', 'en_AU')
supported_languages = supported_languages_en

# Users might want to link this file into /usr/local/bin, so we resolve the
# symbolic link path to the real path if necessary.
_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
_builtin_dictionaries = (
    # name, desc, name, err in aspell, correction in aspell, \
    # err dictionary array, rep dictionary array
    # The arrays must contain the names of aspell dictionaries
    # The aspell tests here aren't the ideal state, but the None's are
    # realistic for obscure words
    ('clear', 'for unambiguous errors', '',
        False, None, supported_languages_en, None),
    ('rare', 'for rare (but valid) words that are likely to be errors', '_rare',  # noqa: E501
        None, None, None, None),
    ('informal', 'for making informal words more formal', '_informal',
        True, True, supported_languages_en, supported_languages_en),
    ('usage', 'for replacing phrasing with recommended terms', '_usage',
        None, None, None, None),
    ('code', 'for words from code and/or mathematics that are likely to be typos in other contexts (such as uint)', '_code',  # noqa: E501
        None, None, None, None,),
    ('names', 'for valid proper names that might be typos', '_names',
        None, None, None, None,),
    ('en-GB_to_en-US', 'for corrections from en-GB to en-US', '_en-GB_to_en-US',  # noqa: E501
        True, True, ('en_GB',), ('en_US',)),
)
_builtin_default = 'clear,rare'

# docs say os.EX_USAGE et al. are only available on Unix systems, so to be safe
# we protect and just use the values they are on macOS and Linux
EX_OK = 0
EX_USAGE = 64
EX_DATAERR = 65

# OPTIONS:
#
# ARGUMENTS:
#    dict_filename       The file containing the dictionary of misspellings.
#                        If set to '-', it will be read from stdin
#    file1 .. fileN      Files to check spelling


class QuietLevels(object):
    NONE = 0
    ENCODING = 1
    BINARY_FILE = 2
    DISABLED_FIXES = 4
    NON_AUTOMATIC_FIXES = 8
    FIXES = 16


class GlobMatch(object):
    def __init__(self, pattern):
        if pattern:
            # Pattern might be a list of comma-delimited strings
            self.pattern_list = ','.join(pattern).split(',')
        else:
            self.pattern_list = None

    def match(self, filename):
        if self.pattern_list is None:
            return False

        for p in self.pattern_list:
            if fnmatch.fnmatch(filename, p):
                return True

        return False


class Misspelling(object):
    def __init__(self, data, fix, reason):
        self.data = data
        self.fix = fix
        self.reason = reason


class TermColors(object):
    def __init__(self):
        self.FILE = '\033[33m'
        self.WWORD = '\033[31m'
        self.FWORD = '\033[32m'
        self.DISABLE = '\033[0m'

    def disable(self):
        self.FILE = ''
        self.WWORD = ''
        self.FWORD = ''
        self.DISABLE = ''


class Summary(object):
    def __init__(self):
        self.summary = {}

    def update(self, wrongword):
        if wrongword in self.summary:
            self.summary[wrongword] += 1
        else:
            self.summary[wrongword] = 1

    def __str__(self):
        keys = list(self.summary.keys())
        keys.sort()

        return "\n".join(["{0}{1:{width}}".format(
                         key,
                         self.summary.get(key),
                         width=15 - len(key)) for key in keys])


class FileOpener(object):
    def __init__(self, use_chardet, quiet_level):
        self.use_chardet = use_chardet
        if use_chardet:
            self.init_chardet()
        self.quiet_level = quiet_level

    def init_chardet(self):
        try:
            from chardet.universaldetector import UniversalDetector
        except ImportError:
            raise ImportError("There's no chardet installed to import from. "
                              "Please, install it and check your PYTHONPATH "
                              "environment variable")

        self.encdetector = UniversalDetector()

    def open(self, filename):
        if self.use_chardet:
            return self.open_with_chardet(filename)
        else:
            return self.open_with_internal(filename)

    def open_with_chardet(self, filename):
        self.encdetector.reset()
        with codecs.open(filename, 'rb') as f:
            for line in f:
                self.encdetector.feed(line)
                if self.encdetector.done:
                    break
        self.encdetector.close()
        encoding = self.encdetector.result['encoding']

        try:
            f = codecs.open(filename, 'r', encoding=encoding)
        except UnicodeDecodeError:
            print("ERROR: Could not detect encoding: %s" % filename,
                  file=sys.stderr)
            raise
        except LookupError:
            print("ERROR: Don't know how to handle encoding %s: %s"
                  % (encoding, filename,), file=sys.stderr)
            raise
        else:
            lines = f.readlines()
            f.close()

        return lines, encoding

    def open_with_internal(self, filename):
        curr = 0
        while True:
            try:
                f = codecs.open(filename, 'r', encoding=encodings[curr])
            except UnicodeDecodeError:
                if not self.quiet_level & QuietLevels.ENCODING:
                    print("WARNING: Decoding file using encoding=%s failed: %s"
                          % (encodings[curr], filename,), file=sys.stderr)
                    try:
                        print("WARNING: Trying next encoding %s"
                              % encodings[curr + 1], file=sys.stderr)
                    except IndexError:
                        pass

                curr += 1
            else:
                lines = f.readlines()
                f.close()
                break
        if not lines:
            raise Exception('Unknown encoding')

        encoding = encodings[curr]

        return lines, encoding

# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-


# If someday this breaks, we can just switch to using RawTextHelpFormatter,
# but it has the disadvantage of not wrapping our long lines.

class NewlineHelpFormatter(argparse.HelpFormatter):
    """Help formatter that preserves newlines and deals with lists."""

    def _split_lines(self, text, width):
        parts = text.split('\n')
        out = list()
        for pi, part in enumerate(parts):
            # Eventually we could allow others...
            indent_start = '- '
            if part.startswith(indent_start):
                offset = len(indent_start)
            else:
                offset = 0
            part = part[offset:]
            part = self._whitespace_matcher.sub(' ', part).strip()
            parts = textwrap.wrap(part, width - offset)
            parts = [' ' * offset + p for p in parts]
            if offset:
                parts[0] = indent_start + parts[0][offset:]
            out.extend(parts)
        return out


def parse_options(args):
    parser = argparse.ArgumentParser(formatter_class=NewlineHelpFormatter)

    parser.set_defaults(colors=sys.stdout.isatty())
    parser.add_argument('--version', action='version', version=VERSION)

    parser.add_argument('-d', '--disable-colors',
                        action='store_false', dest='colors',
                        help='disable colors, even when printing to terminal '
                             '(always set for Windows)')
    parser.add_argument('-c', '--enable-colors',
                        action='store_true', dest='colors',
                        help='enable colors, even when not printing to '
                             'terminal')

    parser.add_argument('-w', '--write-changes',
                        action='store_true', default=False,
                        help='write changes in place if possible')

    parser.add_argument('-D', '--dictionary',
                        action='append',
                        help='custom dictionary file that contains spelling '
                             'corrections. If this flag is not specified or '
                             'equals "-" then the default dictionary is used. '
                             'This option can be specified multiple times.')
    builtin_opts = '\n- '.join([''] + [
        '%r %s' % (d[0], d[1]) for d in _builtin_dictionaries])
    parser.add_argument('--builtin',
                        dest='builtin', default=_builtin_default,
                        metavar='BUILTIN-LIST',
                        help='comma-separated list of builtin dictionaries '
                        'to include (when "-D -" or no "-D" is passed). '
                        'Current options are:' + builtin_opts + '\n'
                        'The default is %(default)r.')
    parser.add_argument('--ignore-regex',
                        action='store', type=str,
                        help='regular expression that is used to find '
                             'patterns to ignore by treating as whitespace. '
                             'When writing regular expressions, consider '
                             'ensuring there are boundary non-word chars, '
                             'e.g., "\\bmatch\\b". Defaults to '
                             'empty/disabled.')
    parser.add_argument('-I', '--ignore-words',
                        action='append', metavar='FILE',
                        help='file that contains words that will be ignored '
                             'by codespell. File must contain 1 word per line.'
                             ' Words are case sensitive based on how they are '
                             'written in the dictionary file')
    parser.add_argument('-L', '--ignore-words-list',
                        action='append', metavar='WORDS',
                        help='comma separated list of words to be ignored '
                             'by codespell. Words are case sensitive based on '
                             'how they are written in the dictionary file')
    parser.add_argument('--uri-ignore-words-list',
                        action='append', metavar='WORDS',
                        help='comma separated list of words to be ignored '
                             'by codespell in URIs and emails only. Words are '
                             'case sensitive based on how they are written in '
                             'the dictionary file. If set to "*", all '
                             'misspelling in URIs and emails will be ignored.')
    parser.add_argument('-r', '--regex',
                        action='store', type=str,
                        help='regular expression that is used to find words. '
                             'By default any alphanumeric character, the '
                             'underscore, the hyphen, and the apostrophe is '
                             'used to build words. This option cannot be '
                             'specified together with --write-changes.')
    parser.add_argument('--uri-regex',
                        action='store', type=str,
                        help='regular expression that is used to find URIs '
                             'and emails. A default expression is provided.')
    parser.add_argument('-s', '--summary',
                        action='store_true', default=False,
                        help='print summary of fixes')

    parser.add_argument('--count',
                        action='store_true', default=False,
                        help='print the number of errors as the last line of '
                             'stderr')

    parser.add_argument('-S', '--skip',
                        action='append',
                        help='comma-separated list of files to skip. It '
                             'accepts globs as well. E.g.: if you want '
                             'codespell to skip .eps and .txt files, '
                             'you\'d give "*.eps,*.txt" to this option.')

    parser.add_argument('-x', '--exclude-file', type=str, metavar='FILE',
                        help='ignore whole lines that match those '
                             'in the file FILE. The lines in FILE '
                             'should match the to-be-excluded lines exactly')

    parser.add_argument('-i', '--interactive',
                        action='store', type=int, default=0,
                        help='set interactive mode when writing changes:\n'
                             '- 0: no interactivity.\n'
                             '- 1: ask for confirmation.\n'
                             '- 2: ask user to choose one fix when more than one is available.\n'  # noqa: E501
                             '- 3: both 1 and 2')

    parser.add_argument('-q', '--quiet-level',
                        action='store', type=int, default=2,
                        help='bitmask that allows suppressing messages:\n'
                             '- 0: print all messages.\n'
                             '- 1: disable warnings about wrong encoding.\n'
                             '- 2: disable warnings about binary files.\n'
                             '- 4: omit warnings about automatic fixes that were disabled in the dictionary.\n'  # noqa: E501
                             '- 8: don\'t print anything for non-automatic fixes.\n'  # noqa: E501
                             '- 16: don\'t print the list of fixed files.\n'
                             'As usual with bitmasks, these levels can be '
                             'combined; e.g. use 3 for levels 1+2, 7 for '
                             '1+2+4, 23 for 1+2+4+16, etc. '
                             'The default mask is %(default)s.')

    parser.add_argument('-e', '--hard-encoding-detection',
                        action='store_true', default=False,
                        help='use chardet to detect the encoding of each '
                             'file. This can slow down codespell, but is more '
                             'reliable in detecting encodings other than '
                             'utf-8, iso8859-1, and ascii.')

    parser.add_argument('-f', '--check-filenames',
                        action='store_true', default=False,
                        help='check file names as well')

    parser.add_argument('-H', '--check-hidden',
                        action='store_true', default=False,
                        help='check hidden files and directories (those '
                             'starting with ".") as well.')
    parser.add_argument('-A', '--after-context', type=int, metavar='LINES',
                        help='print LINES of trailing context')
    parser.add_argument('-B', '--before-context', type=int, metavar='LINES',
                        help='print LINES of leading context')
    parser.add_argument('-C', '--context', type=int, metavar='LINES',
                        help='print LINES of surrounding context')
    parser.add_argument('--config', type=str,
                        help='path to config file.')
    parser.add_argument('--toml', type=str,
                        help='path to a pyproject.toml file.')
    parser.add_argument('files', nargs='*',
                        help='files or directories to check')

    # Parse command line options.
    options = parser.parse_args(list(args))

    # Load config files and look for ``codespell`` options.
    cfg_files = ['setup.cfg', '.codespellrc']
    if options.config:
        cfg_files.append(options.config)
    config = configparser.ConfigParser()

    # Read toml before other config files.
    toml_files_errors = list()
    if os.path.isfile('pyproject.toml'):
        toml_files_errors.append(('pyproject.toml', False))
    if options.toml:
        toml_files_errors.append((options.toml, True))
    for toml_file, raise_error in toml_files_errors:
        try:
            import tomli
        except Exception as exc:
            if raise_error:
                raise ImportError(
                    f'tomli is required to read pyproject.toml but could not '
                    f'be imported, got: {exc}') from None
            else:
                continue
        with open(toml_file, 'rb') as f:
            data = tomli.load(f).get('tool', {})
        config.read_dict(data)
    config.read(cfg_files)

    if config.has_section('codespell'):
        # Build a "fake" argv list using option name and value.
        cfg_args = []
        for key in config['codespell']:
            # Add option as arg.
            cfg_args.append("--%s" % key)
            # If value is blank, skip.
            val = config['codespell'][key]
            if val != "":
                cfg_args.append(val)

        # Parse config file options.
        options = parser.parse_args(cfg_args)

        # Re-parse command line options to override config.
        options = parser.parse_args(list(args), namespace=options)

    if not options.files:
        options.files.append('.')

    return options, parser


def parse_ignore_words_option(ignore_words_option):
    ignore_words = set()
    if ignore_words_option:
        for comma_separated_words in ignore_words_option:
            for word in comma_separated_words.split(','):
                ignore_words.add(word.strip())
    return ignore_words


def build_exclude_hashes(filename, exclude_lines):
    with codecs.open(filename, 'r') as f:
        for line in f:
            exclude_lines.add(line)


def build_ignore_words(filename, ignore_words):
    with codecs.open(filename, mode='r', encoding='utf-8') as f:
        for line in f:
            ignore_words.add(line.strip())


def build_dict(filename, misspellings, ignore_words):
    with codecs.open(filename, mode='r', encoding='utf-8') as f:
        for line in f:
            [key, data] = line.split('->')
            # TODO for now, convert both to lower. Someday we can maybe add
            # support for fixing caps.
            key = key.lower()
            data = data.lower()
            if key in ignore_words:
                continue
            data = data.strip()
            fix = data.rfind(',')

            if fix < 0:
                fix = True
                reason = ''
            elif fix == (len(data) - 1):
                data = data[:fix]
                reason = ''
                fix = False
            else:
                reason = data[fix + 1:].strip()
                data = data[:fix]
                fix = False

            misspellings[key] = Misspelling(data, fix, reason)


def is_hidden(filename, check_hidden):
    bfilename = os.path.basename(filename)

    return bfilename not in ('', '.', '..') and \
        (not check_hidden and bfilename[0] == '.')


def is_text_file(filename):
    with open(filename, mode='rb') as f:
        s = f.read(1024)
    if b'\x00' in s:
        return False
    return True


def fix_case(word, fixword):
    if word == word.capitalize():
        return ', '.join(w.strip().capitalize() for w in fixword.split(','))
    elif word == word.upper():
        return fixword.upper()
    # they are both lower case
    # or we don't have any idea
    return fixword


def ask_for_word_fix(line, wrongword, misspelling, interactivity):
    if interactivity <= 0:
        return misspelling.fix, fix_case(wrongword, misspelling.data)

    if misspelling.fix and interactivity & 1:
        r = ''
        fixword = fix_case(wrongword, misspelling.data)
        while not r:
            print("%s\t%s ==> %s (Y/n) " % (line, wrongword, fixword), end='')
            r = sys.stdin.readline().strip().upper()
            if not r:
                r = 'Y'
            if r != 'Y' and r != 'N':
                print("Say 'y' or 'n'")
                r = ''

        if r == 'N':
            misspelling.fix = False
            misspelling.fixword = ''

    elif (interactivity & 2) and not misspelling.reason:
        # if it is not disabled, i.e. it just has more than one possible fix,
        # we ask the user which word to use

        r = ''
        opt = [w.strip() for w in misspelling.data.split(',')]
        while not r:
            print("%s Choose an option (blank for none): " % line, end='')
            for i in range(len(opt)):
                fixword = fix_case(wrongword, opt[i])
                print(" %d) %s" % (i, fixword), end='')
            print(": ", end='')
            sys.stdout.flush()

            n = sys.stdin.readline().strip()
            if not n:
                break

            try:
                n = int(n)
                r = opt[n]
            except (ValueError, IndexError):
                print("Not a valid option\n")

        if r:
            misspelling.fix = True
            misspelling.data = r

    return misspelling.fix, fix_case(wrongword, misspelling.data)


def print_context(lines, index, context):
    # context = (context_before, context_after)
    for i in range(index - context[0], index + context[1] + 1):
        if 0 <= i < len(lines):
            print('%s %s' % ('>' if i == index else ':', lines[i].rstrip()))


def extract_words(text, word_regex, ignore_word_regex):
    if ignore_word_regex:
        text = ignore_word_regex.sub(' ', text)
    return word_regex.findall(text)


def apply_uri_ignore_words(check_words, line, word_regex, ignore_word_regex,
                           uri_regex, uri_ignore_words):
    if not uri_ignore_words:
        return
    for uri in re.findall(uri_regex, line):
        for uri_word in extract_words(uri, word_regex,
                                      ignore_word_regex):
            if uri_word in uri_ignore_words:
                check_words.remove(uri_word)


def parse_file(filename, colors, summary, misspellings, exclude_lines,
               file_opener, word_regex, ignore_word_regex, uri_regex,
               uri_ignore_words, context, options):
    bad_count = 0
    lines = None
    changed = False
    encoding = encodings[0]  # if not defined, use UTF-8

    if filename == '-':
        f = sys.stdin
        lines = f.readlines()
    else:
        if options.check_filenames:
            for word in extract_words(filename, word_regex, ignore_word_regex):
                lword = word.lower()
                if lword not in misspellings:
                    continue
                fix = misspellings[lword].fix
                fixword = fix_case(word, misspellings[lword].data)

                if summary and fix:
                    summary.update(lword)

                cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE)
                cwrongword = "%s%s%s" % (colors.WWORD, word, colors.DISABLE)
                crightword = "%s%s%s" % (colors.FWORD, fixword, colors.DISABLE)

                if misspellings[lword].reason:
                    if options.quiet_level & QuietLevels.DISABLED_FIXES:
                        continue
                    creason = "  | %s%s%s" % (colors.FILE,
                                              misspellings[lword].reason,
                                              colors.DISABLE)
                else:
                    if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
                        continue
                    creason = ''

                bad_count += 1

                print("%(FILENAME)s: %(WRONGWORD)s"
                      " ==> %(RIGHTWORD)s%(REASON)s"
                      % {'FILENAME': cfilename,
                         'WRONGWORD': cwrongword,
                         'RIGHTWORD': crightword, 'REASON': creason})

        # ignore irregular files
        if not os.path.isfile(filename):
            return bad_count

        text = is_text_file(filename)
        if not text:
            if not options.quiet_level & QuietLevels.BINARY_FILE:
                print("WARNING: Binary file: %s" % filename, file=sys.stderr)
            return bad_count
        try:
            lines, encoding = file_opener.open(filename)
        except Exception:
            return bad_count

    for i, line in enumerate(lines):
        if line in exclude_lines:
            continue

        fixed_words = set()
        asked_for = set()

        # If all URI spelling errors will be ignored, erase any URI before
        # extracting words. Otherwise, apply ignores after extracting words.
        # This ensures that if a URI ignore word occurs both inside a URI and
        # outside, it will still be a spelling error.
        if "*" in uri_ignore_words:
            line = uri_regex.sub(' ', line)
        check_words = extract_words(line, word_regex, ignore_word_regex)
        if "*" not in uri_ignore_words:
            apply_uri_ignore_words(check_words, line, word_regex,
                                   ignore_word_regex, uri_regex,
                                   uri_ignore_words)

        for word in check_words:
            lword = word.lower()
            if lword in misspellings:
                context_shown = False
                fix = misspellings[lword].fix
                fixword = fix_case(word, misspellings[lword].data)

                if options.interactive and lword not in asked_for:
                    if context is not None:
                        context_shown = True
                        print_context(lines, i, context)
                    fix, fixword = ask_for_word_fix(
                        lines[i], word, misspellings[lword],
                        options.interactive)
                    asked_for.add(lword)

                if summary and fix:
                    summary.update(lword)

                if word in fixed_words:  # can skip because of re.sub below
                    continue

                if options.write_changes and fix:
                    changed = True
                    lines[i] = re.sub(r'\b%s\b' % word, fixword, lines[i])
                    fixed_words.add(word)
                    continue

                # otherwise warning was explicitly set by interactive mode
                if (options.interactive & 2 and not fix and not
                        misspellings[lword].reason):
                    continue

                cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE)
                cline = "%s%d%s" % (colors.FILE, i + 1, colors.DISABLE)
                cwrongword = "%s%s%s" % (colors.WWORD, word, colors.DISABLE)
                crightword = "%s%s%s" % (colors.FWORD, fixword, colors.DISABLE)

                if misspellings[lword].reason:
                    if options.quiet_level & QuietLevels.DISABLED_FIXES:
                        continue

                    creason = "  | %s%s%s" % (colors.FILE,
                                              misspellings[lword].reason,
                                              colors.DISABLE)
                else:
                    if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
                        continue

                    creason = ''

                # If we get to this point (uncorrected error) we should change
                # our bad_count and thus return value
                bad_count += 1

                if (not context_shown) and (context is not None):
                    print_context(lines, i, context)
                if filename != '-':
                    print("%(FILENAME)s:%(LINE)s: %(WRONGWORD)s "
                          "==> %(RIGHTWORD)s%(REASON)s"
                          % {'FILENAME': cfilename, 'LINE': cline,
                             'WRONGWORD': cwrongword,
                             'RIGHTWORD': crightword, 'REASON': creason})
                else:
                    print("%(LINE)s: %(STRLINE)s\n\t%(WRONGWORD)s "
                          "==> %(RIGHTWORD)s%(REASON)s"
                          % {'LINE': cline, 'STRLINE': line.strip(),
                             'WRONGWORD': cwrongword,
                             'RIGHTWORD': crightword, 'REASON': creason})

    if changed:
        if filename == '-':
            print("---")
            for line in lines:
                print(line, end='')
        else:
            if not options.quiet_level & QuietLevels.FIXES:
                print("%sFIXED:%s %s"
                      % (colors.FWORD, colors.DISABLE, filename),
                      file=sys.stderr)
            with codecs.open(filename, 'w', encoding=encoding) as f:
                f.writelines(lines)
    return bad_count


def _script_main():
    """Wrap to main() for setuptools."""
    return main(*sys.argv[1:])


def main(*args):
    """Contains flow control"""
    options, parser = parse_options(args)

    if options.regex and options.write_changes:
        print("ERROR: --write-changes cannot be used together with "
              "--regex", file=sys.stderr)
        parser.print_help()
        return EX_USAGE
    word_regex = options.regex or word_regex_def
    try:
        word_regex = re.compile(word_regex)
    except re.error as err:
        print("ERROR: invalid --regex \"%s\" (%s)" %
              (word_regex, err), file=sys.stderr)
        parser.print_help()
        return EX_USAGE

    if options.ignore_regex:
        try:
            ignore_word_regex = re.compile(options.ignore_regex)
        except re.error as err:
            print("ERROR: invalid --ignore-regex \"%s\" (%s)" %
                  (options.ignore_regex, err), file=sys.stderr)
            parser.print_help()
            return EX_USAGE
    else:
        ignore_word_regex = None

    ignore_words_files = options.ignore_words or []
    ignore_words = parse_ignore_words_option(options.ignore_words_list)
    for ignore_words_file in ignore_words_files:
        if not os.path.isfile(ignore_words_file):
            print("ERROR: cannot find ignore-words file: %s" %
                  ignore_words_file, file=sys.stderr)
            parser.print_help()
            return EX_USAGE
        build_ignore_words(ignore_words_file, ignore_words)

    uri_regex = options.uri_regex or uri_regex_def
    try:
        uri_regex = re.compile(uri_regex)
    except re.error as err:
        print("ERROR: invalid --uri-regex \"%s\" (%s)" %
              (uri_regex, err), file=sys.stderr)
        parser.print_help()
        return EX_USAGE
    uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list)

    if options.dictionary:
        dictionaries = options.dictionary
    else:
        dictionaries = ['-']
    use_dictionaries = list()
    for dictionary in dictionaries:
        if dictionary == "-":
            # figure out which builtin dictionaries to use
            use = sorted(set(options.builtin.split(',')))
            for u in use:
                for builtin in _builtin_dictionaries:
                    if builtin[0] == u:
                        use_dictionaries.append(
                            os.path.join(_data_root, 'dictionary%s.txt'
                                         % (builtin[2],)))
                        break
                else:
                    print("ERROR: Unknown builtin dictionary: %s" % (u,),
                          file=sys.stderr)
                    parser.print_help()
                    return EX_USAGE
        else:
            if not os.path.isfile(dictionary):
                print("ERROR: cannot find dictionary file: %s" % dictionary,
                      file=sys.stderr)
                parser.print_help()
                return EX_USAGE
            use_dictionaries.append(dictionary)
    misspellings = dict()
    for dictionary in use_dictionaries:
        build_dict(dictionary, misspellings, ignore_words)
    colors = TermColors()
    if not options.colors or sys.platform == 'win32':
        colors.disable()

    if options.summary:
        summary = Summary()
    else:
        summary = None

    context = None
    if options.context is not None:
        if (options.before_context is not None) or \
                (options.after_context is not None):
            print("ERROR: --context/-C cannot be used together with "
                  "--context-before/-B or --context-after/-A",
                  file=sys.stderr)
            parser.print_help()
            return EX_USAGE
        context_both = max(0, options.context)
        context = (context_both, context_both)
    elif (options.before_context is not None) or \
            (options.after_context is not None):
        context_before = 0
        context_after = 0
        if options.before_context is not None:
            context_before = max(0, options.before_context)
        if options.after_context is not None:
            context_after = max(0, options.after_context)
        context = (context_before, context_after)

    exclude_lines = set()
    if options.exclude_file:
        build_exclude_hashes(options.exclude_file, exclude_lines)

    file_opener = FileOpener(options.hard_encoding_detection,
                             options.quiet_level)
    glob_match = GlobMatch(options.skip)

    bad_count = 0
    for filename in options.files:
        # ignore hidden files
        if is_hidden(filename, options.check_hidden):
            continue

        if os.path.isdir(filename):
            for root, dirs, files in os.walk(filename):
                if glob_match.match(root):  # skip (absolute) directories
                    del dirs[:]
                    continue
                if is_hidden(root, options.check_hidden):  # dir itself hidden
                    continue
                for file_ in files:
                    # ignore hidden files in directories
                    if is_hidden(file_, options.check_hidden):
                        continue
                    if glob_match.match(file_):  # skip files
                        continue
                    fname = os.path.join(root, file_)
                    if glob_match.match(fname):  # skip paths
                        continue
                    bad_count += parse_file(
                        fname, colors, summary, misspellings, exclude_lines,
                        file_opener, word_regex, ignore_word_regex, uri_regex,
                        uri_ignore_words, context, options)

                # skip (relative) directories
                dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)]

        elif not glob_match.match(filename):  # skip files
            bad_count += parse_file(
                filename, colors, summary, misspellings, exclude_lines,
                file_opener, word_regex, ignore_word_regex, uri_regex,
                uri_ignore_words, context, options)

    if summary:
        print("\n-------8<-------\nSUMMARY:")
        print(summary)
    if options.count:
        print(bad_count, file=sys.stderr)
    return EX_DATAERR if bad_count else EX_OK