#!/usr/bin/env python3 # # markdown-preprocess - filter *.md.in files, convert to .md # """ Simpleminded include mechanism for podman man pages. """ import filecmp import glob import os import re import sys class Preprocessor(): """ Doesn't really merit a whole OO approach, except we have a lot of state variables to pass around, and self is a convenient way to do that. Better than globals, anyway. """ def __init__(self): self.infile = '' self.pod_or_container = '' self.used_by = {} def render(self, text: str, context: dict) -> str: """ Renders the `text` handling the following extra formatting features: ``` << if variable >> ... << endif >> << if not variable >> ... << else >> ... << endif >> << "foo" if variable else "bar" >> ``` Returns the rendered text. """ # Match << ... >> TOK = re.compile(r"<<(.*?)>>", re.DOTALL) out = [] pos = 0 stack = [] # each frame: {"active": bool, "seen_else": bool} def is_active(): return all(f["active"] for f in stack) def get_variable(name: str): v = context.get(name, None) if v is None: raise ValueError(f"undefined variable: {name}") return v def truthy(name: str) -> bool: name = name.strip() if name.startswith("not "): v = get_variable(name[4:].strip()) return not bool(v) return bool(get_variable(name)) for m in TOK.finditer(text): # write literal up to token literal = text[pos:m.start()] if is_active(): out.append(literal) pos = m.end() inner = m.group(1).strip() # control blocks if inner.startswith("if ") and len(inner[3:].strip().split(" ")) in [1, 2]: cond = inner[3:].strip() stack.append({"active": is_active() and truthy(cond), "seen_else": False}) continue if inner == "else": if not stack: raise ValueError("`else` without `if`") frame = stack[-1] if frame["seen_else"]: raise ValueError("multiple `else` in the same `if`") frame["seen_else"] = True parent_active = all(f["active"] for f in stack[:-1]) frame["active"] = parent_active and not frame["active"] continue if inner == "endif": if not stack: raise ValueError("`end` without `if`") stack.pop() continue # inline "X if cond else Y" --- if " if " in inner and " else " in inner: try: # split by " if " then " else " then_part, rest = inner.split(" if ", 1) cond, else_part = rest.split(" else ", 1) cond = cond.strip() chosen = then_part if truthy(cond) else else_part if is_active(): out.append(chosen.strip().strip("'\"")) except Exception as e: raise ValueError(f"Invalid inline if/else syntax: {inner}") from e continue # trailing literal if is_active(): out.append(text[pos:]) if stack: raise ValueError("unclosed `if` block(s)") return "".join(out) def process(self, infile:str): """ Main calling point: preprocesses one file """ self.infile = infile # Some options are the same between containers and pods; determine # which description to use from the name of the source man page. self.pod_or_container = 'container' if '-pod-' in infile or '-kube-' in infile: self.pod_or_container = 'pod' # foo.md.in -> foo.md -- but always write to a tmpfile outfile = os.path.splitext(infile)[0] outfile_tmp = outfile + '.tmp.' + str(os.getpid()) with open(infile, 'r', encoding='utf-8') as fh_in, open(outfile_tmp, 'w', encoding='utf-8', newline='\n') as fh_out: for line in fh_in: try: # '@@option foo' -> include file options/foo.md if line.startswith('@@option '): _, optionname = line.strip().split(" ") is_quadlet = optionname.startswith("quadlet:") if is_quadlet: optionname = optionname[len("quadlet:"):] optionfile = os.path.join("options", optionname + '.md') self.track_optionfile(optionfile) self.insert_file(fh_out, optionfile, is_quadlet) # '@@include relative-path/must-exist.md' elif line.startswith('@@include '): _, path = line.strip().split(" ") self.insert_file(fh_out, path) else: fh_out.write(line) except Exception as ex: raise Exception(f"Error while processing {infile} line '{line[:-1]}'") from ex os.chmod(outfile_tmp, 0o444) os.rename(outfile_tmp, outfile) def track_optionfile(self, optionfile: str): """ Keep track of which man pages use which option files """ if optionfile not in self.used_by: self.used_by[optionfile] = [] self.used_by[optionfile].append(self.podman_subcommand('full')) def rewrite_optionfiles(self): """ Rewrite all option files, such that they include header comments cross-referencing all the man pages in which they're used. """ for optionfile in self.used_by: tmpfile = optionfile + '.tmp.' + str(os.getpid()) with open(optionfile, 'r', encoding='utf-8') as fh_in, open(tmpfile, 'w', encoding='utf-8', newline='\n') as fh_out: fh_out.write("####> This option file is used in:\n") used_by = ', '.join(x for x in self.used_by[optionfile]) fh_out.write(f"####> podman {used_by}\n") fh_out.write("####> If file is edited, make sure the changes\n") fh_out.write("####> are applicable to all of those.\n") for line in fh_in: if not line.startswith('####>'): fh_out.write(line) # Compare files; only rewrite if the new one differs if not filecmp.cmp(optionfile, tmpfile): os.unlink(optionfile) os.rename(tmpfile, optionfile) else: os.unlink(tmpfile) def insert_file(self, fh_out, path: str, is_quadlet=False): """ Reads one option file, writes it out to the given output filehandle """ # Comment intended to help someone viewing the .md file. # Leading newline is important because if two lines are # consecutive without a break, sphinx (but not go-md2man) # treats them as one line and will unwantedly render the # comment in its output. fh_out.write("\n[//]: # (BEGIN included file " + path + ")\n") with open(path, 'r', encoding='utf-8') as fh_included: rendered = self.render(fh_included.read(), {"is_quadlet": is_quadlet}) for opt_line in rendered.splitlines(): if opt_line.startswith('####>'): continue opt_line = self.replace_type(opt_line) opt_line = opt_line.replace('<>', self.podman_subcommand()) opt_line = opt_line.replace('<>', self.podman_subcommand('full')) fh_out.write(opt_line + '\n') fh_out.write("\n[//]: # (END included file " + path + ")\n") def podman_subcommand(self, full=None) -> str: """ Returns the string form of the podman command, based on man page name; e.g., 'foo bar' for podman-foo-bar.1.md.in """ subcommand = self.infile # Special case: 'podman-pod-start' becomes just 'start' if not full: if subcommand.startswith("podman-pod-"): subcommand = subcommand[len("podman-pod-"):] # For quadlet man-pages, the subcommand is simply the full manpage name. if subcommand.endswith(".unit.5.md.in"): return subcommand if subcommand.startswith("podman-"): subcommand = subcommand[len("podman-"):] if subcommand.endswith(".1.md.in"): subcommand = subcommand[:-len(".1.md.in")] return subcommand.replace("-", " ") def replace_type(self, line: str) -> str: """ Replace instances of '<>' with the appropriate one based on whether this is a pod-related man page or not. """ # Internal helper function: determines the desired half of the string def replwith(matchobj): lhs, rhs = matchobj[0].split('|') # Strip off '<<' and '>>' lhs = lhs[2:] rhs = rhs[:len(rhs)-2] # Check both sides for 'pod' followed by (non-"m" or end-of-string). # The non-m prevents us from triggering on 'podman', which could # conceivably be present in both sides. And we check for 'pod', # not 'container', because it's possible to have something like # <>. if re.match('.*pod([^m]|$)', lhs, re.IGNORECASE): if re.match('.*pod([^m]|$)', rhs, re.IGNORECASE): raise Exception(f"'{matchobj[0]}' matches 'pod' in both left and right sides") # Only left-hand side has "pod" if self.pod_or_container == 'pod': return lhs return rhs # 'pod' not in lhs, must be in rhs if not re.match('.*pod([^m]|$)', rhs, re.IGNORECASE): raise Exception(f"'{matchobj[0]}' does not match 'pod' in either side") if self.pod_or_container == 'pod': return rhs return lhs return re.sub(r'<<[^\|>]*\|[^\|>]*>>', replwith, line) def main(): "script entry point" script_dir = os.path.abspath(os.path.dirname(__file__)) man_dir = os.path.join(script_dir,"../docs/source/markdown") try: os.chdir(man_dir) except FileNotFoundError as ex: raise Exception("Please invoke me from the base repo dir") from ex # No longer possible to invoke us with args: reject any such invocation if len(sys.argv) > 1: raise Exception("This script accepts no arguments") preprocessor = Preprocessor() for infile in sorted(glob.glob('*.md.in')): preprocessor.process(infile) # Now rewrite all option files preprocessor.rewrite_optionfiles() if __name__ == "__main__": main()