import os import regex as re from sphinxlint import rst from sphinxlint.utils import ( clean_paragraph, escape2null, hide_non_rst_blocks, looks_like_glued, match_size, paragraphs, ) all_checkers = {} def checker(*suffixes, **kwds): """Decorator to register a function as a checker.""" checker_props = {"enabled": True, "rst_only": True} def deco(func): if not func.__name__.startswith("check_"): raise ValueError("Checker names should start with 'check_'.") for prop, default_value in checker_props.items(): setattr(func, prop, kwds.get(prop, default_value)) func.suffixes = suffixes func.name = func.__name__[len("check_") :].replace("_", "-") all_checkers[func.name] = func return func return deco @checker(".py", rst_only=False) def check_python_syntax(file, lines, options=None): """Search invalid syntax in Python examples.""" code = "".join(lines) if "\r" in code: if os.name != "nt": yield 0, "\\r in code file" code = code.replace("\r", "") try: compile(code, file, "exec") except SyntaxError as err: yield err.lineno, f"not compilable: {err}" @checker(".rst", ".po") def check_missing_backtick_after_role(file, lines, options=None): """Search for roles missing their closing backticks. Bad: :fct:`foo Good: :fct:`foo` """ for paragraph_lno, paragraph in paragraphs(lines): if paragraph.count("|") > 4: return # we don't handle tables yet. for error in rst.ROLE_MISSING_CLOSING_BACKTICK_RE.finditer(paragraph): error_offset = paragraph[: error.start()].count("\n") yield ( paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}", ) _RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL) _END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX) @checker(".rst", ".po") def check_missing_space_after_literal(file, lines, options=None): r"""Search for inline literals immediately followed by a character. Bad: ``items``s Good: ``items``\ s """ for paragraph_lno, paragraph in paragraphs(lines): if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) for role in _RST_ROLE_RE.finditer(paragraph): if not _END_STRING_SUFFIX_RE.match(role[0][-1]): error_offset = paragraph[: role.start()].count("\n") yield ( paragraph_lno + error_offset, "inline literal missing " f"(escaped) space after literal: {role.group(0)!r}", ) _LONE_DOUBLE_BACKTICK_RE = re.compile("(? 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph): error_offset = paragraph[: lone_double_backtick.start()].count("\n") yield ( paragraph_lno + error_offset, "found an unbalanced inline literal markup.", ) _ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search _starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search @checker(".rst", ".po", enabled=False) def check_default_role(file, lines, options=None): """Search for default roles (but they are allowed in many projects). Bad: `print` Good: ``print`` """ for lno, line in enumerate(lines, start=1): line = clean_paragraph(line) line = escape2null(line) for match in rst.INTERPRETED_TEXT_RE.finditer(line): before_match = line[: match.start()] after_match = line[match.end() :] stripped_line = line.strip() if ( stripped_line.startswith("|") and stripped_line.endswith("|") and stripped_line.count("|") >= 4 and "|" in match.group(0) ): return # we don't handle tables yet. if _ends_with_role_tag(before_match): # It's not a default role: it ends with a tag. continue if _starts_with_role_tag(after_match): # It's not a default role: it starts with a tag. continue if match.group(0).startswith("``") and match.group(0).endswith("``"): # It's not a default role: it's an inline literal. continue yield ( lno, "default role used (hint: for inline literals, use double backticks)", ) @checker(".rst", ".po") def check_directive_with_three_dots(file, lines, options=None): """Search for directives with three dots instead of two. Bad: ... versionchanged:: 3.6 Good: .. versionchanged:: 3.6 """ for lno, line in enumerate(lines, start=1): if rst.THREE_DOT_DIRECTIVE_RE.search(line): yield lno, "directive should start with two dots, not three." @checker(".rst", ".po") def check_directive_missing_colons(file, lines, options=None): """Search for directive wrongly typed as comments. Bad: .. versionchanged 3.6. Good: .. versionchanged:: 3.6 """ for lno, line in enumerate(lines, start=1): if rst.SEEMS_DIRECTIVE_RE.search(line): yield lno, "comment seems to be intended as a directive" # The difficulty here is that the following is valid: # The :literal:`:exc:`Exceptions`` # While this is not: # The :literal:`:exc:`Exceptions``s _ROLE_BODY = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+" _ALLOWED_AFTER_ROLE = ( rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP + r"|\s" ) _SUSPICIOUS_ROLE = re.compile( f":{rst.SIMPLENAME}:`{_ROLE_BODY}`[^{_ALLOWED_AFTER_ROLE}]" ) @checker(".rst", ".po") def check_missing_space_after_role(file, lines, options=None): r"""Search for roles immediately followed by a character. Bad: :exc:`Exception`s. Good: :exc:`Exceptions`\ s """ for lno, line in enumerate(lines, start=1): line = clean_paragraph(line) for role in _SUSPICIOUS_ROLE.finditer(line): yield lno, f"role missing (escaped) space after role: {role.group(0)!r}" @checker(".rst", ".po") def check_role_without_backticks(file, lines, options=None): """Search roles without backticks. Bad: :func:pdb.main Good: :func:`pdb.main` """ for lno, line in enumerate(lines, start=1): for no_backticks in rst.ROLE_WITH_NO_BACKTICKS_RE.finditer(line): yield lno, f"role with no backticks: {no_backticks.group(0)!r}" @checker(".rst", ".po") def check_backtick_before_role(file, lines, options=None): """Search for roles preceded by a backtick. Bad: `:fct:`sum` Good: :fct:`sum` """ for lno, line in enumerate(lines, start=1): if "`" not in line: continue if rst.BACKTICK_IN_FRONT_OF_ROLE_RE.search(line): yield lno, "superfluous backtick in front of role" @checker(".rst", ".po") def check_missing_space_in_hyperlink(file, lines, options=None): """Search for hyperlinks missing a space. Bad: `Link text`_ Good: `Link text `_ """ for lno, line in enumerate(lines, start=1): if "`" not in line: continue for match in rst.SEEMS_HYPERLINK_RE.finditer(line): if not match.group(1): yield lno, "missing space before < in hyperlink" @checker(".rst", ".po") def check_missing_underscore_after_hyperlink(file, lines, options=None): """Search for hyperlinks missing underscore after their closing backtick. Bad: `Link text ` Good: `Link text `_ """ for lno, line in enumerate(lines, start=1): if "`" not in line: continue for match in rst.SEEMS_HYPERLINK_RE.finditer(line): if not match.group(2): yield lno, "missing underscore after closing backtick in hyperlink" @checker(".rst", ".po") def check_role_with_double_backticks(file, lines, options=None): """Search for roles with double backticks. Bad: :fct:``sum`` Good: :fct:`sum` The hard thing is that :fct:``sum`` is a legitimate restructuredtext construction: :fct: is just plain text. ``sum`` is an inline literal. So to properly detect this one we're searching for actual inline literals that have a role tag. """ for paragraph_lno, paragraph in paragraphs(lines): if "`" not in paragraph: continue if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = escape2null(paragraph) while True: inline_literal = min( rst.INLINE_LITERAL_RE.finditer(paragraph, overlapped=True), key=match_size, default=None, ) if inline_literal is None: break before = paragraph[: inline_literal.start()] if _ends_with_role_tag(before): error_offset = paragraph[: inline_literal.start()].count("\n") yield ( paragraph_lno + error_offset, "role use a single backtick, double backtick found.", ) paragraph = ( paragraph[: inline_literal.start()] + paragraph[inline_literal.end() :] ) @checker(".rst", ".po") def check_missing_space_before_role(file, lines, options=None): """Search for missing spaces before roles. Bad: the:fct:`sum`, issue:`123`, c:func:`foo` Good: the :fct:`sum`, :issue:`123`, :c:func:`foo` """ for paragraph_lno, paragraph in paragraphs(lines): if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) for match in rst.ROLE_GLUED_WITH_WORD_RE.finditer(paragraph): error_offset = paragraph[: match.start()].count("\n") if looks_like_glued(match): yield ( paragraph_lno + error_offset, f"missing space before role ({match.group(0)}).", ) else: yield ( paragraph_lno + error_offset, f"role missing opening tag colon ({match.group(0)}).", ) @checker(".rst", ".po") def check_missing_space_before_default_role(file, lines, options=None): """Search for missing spaces before default role. Bad: the`sum` Good: the `sum` """ for paragraph_lno, paragraph in paragraphs(lines): if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph) for role in rst.inline_markup_gen( "`", "`", extra_allowed_before="[^_]" ).finditer(paragraph): error_offset = paragraph[: role.start()].count("\n") context = paragraph[role.start() - 3 : role.end()] yield ( paragraph_lno + error_offset, f"missing space before default role: {context!r}.", ) _HYPERLINK_REFERENCE_RE = re.compile(r"\S* `_") @checker(".rst", ".po") def check_hyperlink_reference_missing_backtick(file, lines, options=None): """Search for missing backticks in front of hyperlink references. Bad: Misc/NEWS `_ Good: `Misc/NEWS `_ """ for paragraph_lno, paragraph in paragraphs(lines): if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph) for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph): error_offset = paragraph[: hyperlink_reference.start()].count("\n") context = hyperlink_reference.group(0) yield ( paragraph_lno + error_offset, f"missing backtick before hyperlink reference: {context!r}.", ) @checker(".rst", ".po") def check_missing_colon_in_role(file, lines, options=None): """Search for missing colons in roles. Bad: :issue`123` Good: :issue:`123` """ for lno, line in enumerate(lines, start=1): for match in rst.ROLE_MISSING_RIGHT_COLON_RE.finditer(line): yield lno, f"role missing colon before first backtick ({match.group(0)})." @checker(".py", ".rst", ".po", rst_only=False) def check_carriage_return(file, lines, options=None): r"""Check for carriage returns (\r) in lines.""" for lno, line in enumerate(lines): if "\r" in line: yield lno + 1, "\\r in line" @checker(".py", ".rst", ".po", rst_only=False) def check_horizontal_tab(file, lines, options=None): r"""Check for horizontal tabs (\t) in lines.""" for lno, line in enumerate(lines): if "\t" in line: yield lno + 1, "OMG TABS!!!1" @checker(".py", ".rst", ".po", rst_only=False) def check_trailing_whitespace(file, lines, options=None): """Check for trailing whitespaces at end of lines.""" for lno, line in enumerate(lines): stripped_line = line.rstrip("\n") if stripped_line.rstrip(" \t") != stripped_line: yield lno + 1, "trailing whitespace" @checker(".py", ".rst", ".po", rst_only=False) def check_missing_final_newline(file, lines, options=None): """Check that the last line of the file ends with a newline.""" if lines and not lines[-1].endswith("\n"): yield len(lines), "No newline at end of file." _is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match _starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match _starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match _is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match @checker(".rst", ".po", enabled=False, rst_only=True) def check_line_too_long(file, lines, options=None): """Check for line length; this checker is not run by default.""" for lno, line in enumerate(lines): # Beware, in `line` we have the trailing newline. if len(line) - 1 > options.max_line_length: if line.lstrip()[0] in "+|": continue # ignore wide tables if _is_long_interpreted_text(line): continue # ignore long interpreted text if _starts_with_directive_or_hyperlink(line): continue # ignore directives and hyperlink targets if _starts_with_anonymous_hyperlink(line): continue # ignore anonymous hyperlink targets if _is_very_long_string_literal(line): continue # ignore a very long literal string yield lno + 1, f"Line too long ({len(line) - 1}/{options.max_line_length})" @checker(".html", enabled=False, rst_only=False) def check_leaked_markup(file, lines, options=None): """Check HTML files for leaked reST markup. This only works if the HTML files have been built. """ for lno, line in enumerate(lines): if rst.LEAKED_MARKUP_RE.search(line): yield lno + 1, f"possibly leaked markup: {line}" @checker(".rst", ".po", enabled=False) def check_triple_backticks(file, lines, options=None): """Check for triple backticks, like ```Point``` (but it's a valid syntax). Bad: ```Point``` Good: ``Point`` In reality, triple backticks are valid: ```foo``` gets rendered as `foo`, it's at least used by Sphinx to document rst syntax, but it's really uncommon. """ for lno, line in enumerate(lines): for match in rst.TRIPLE_BACKTICKS_RE.finditer(line): yield lno + 1, "There's no rst syntax using triple backticks" _has_bad_dedent = re.compile(" [^ ].*::$").match @checker(".rst", ".po", rst_only=False) def check_bad_dedent(file, lines, options=None): """Check for mis-alignment in indentation in code blocks. |A 5 lines block:: | | Hello! | | Looks like another block:: | | But in fact it's not due to the leading space. """ errors = [] def check_block(block_lineno, block): for lineno, line in enumerate(block.splitlines()): if _has_bad_dedent(line): errors.append((block_lineno + lineno, "Bad dedent in block")) list(hide_non_rst_blocks(lines, hidden_block_cb=check_block)) yield from errors _has_dangling_hyphen = re.compile(r".*[a-z]-$").match @checker(".rst", rst_only=True) def check_dangling_hyphen(file, lines, options): """Check for lines ending in a hyphen.""" for lno, line in enumerate(lines): stripped_line = line.rstrip("\n") if _has_dangling_hyphen(stripped_line): yield lno + 1, "Line ends with dangling hyphen" @checker(".rst", ".po", rst_only=False, enabled=True) def check_unnecessary_parentheses(filename, lines, options): """Check for unnecessary parentheses in :func: and :meth: roles. Bad: :func:`test()` Good: :func:`test` """ for lno, line in enumerate(lines, start=1): for match in rst.ROLE_WITH_UNNECESSARY_PARENTHESES_RE.finditer(line): yield lno, f"Unnecessary parentheses in {match.group(0).strip()!r}"