Update MTex

2025-08-03 04:04:36 +08:00 · 2022-01-26 23:46:13 +08:00
parent 240f5020b4
commit 942a7e71b8
1 changed files with 120 additions and 96 deletions
--- a/manimlib/mobject/svg/mtex_mobject.py
+++ b/manimlib/mobject/svg/mtex_mobject.py
@ -1,5 +1,6 @@
 import itertools as it
 import re
+import sys
 from types import MethodType

 from manimlib.mobject.svg.svg_mobject import SVGMobject
@ -10,6 +11,7 @@ from manimlib.utils.iterables import remove_list_redundancies
 from manimlib.utils.tex_file_writing import tex_to_svg_file
 from manimlib.utils.tex_file_writing import get_tex_config
 from manimlib.utils.tex_file_writing import display_during_execution
+from manimlib.logger import log


 SCALE_FACTOR_PER_FONT_POINT = 0.001
@ -37,6 +39,11 @@ class _PlainTex(SVGMobject):


 class _LabelledTex(_PlainTex):
+    def __init__(self, file_name=None, **kwargs):
+        super().__init__(file_name, **kwargs)
+        for glyph in self:
+            glyph.glyph_label = _LabelledTex.color_str_to_label(glyph.fill_color)
+
    @staticmethod
    def color_str_to_label(color_str):
        if len(color_str) == 4:
@ -44,21 +51,6 @@ class _LabelledTex(_PlainTex):
            color_str = "#" + "".join([c * 2 for c in color_str[1:]])
        return int(color_str[1:], 16)

-    def get_mobjects_from(self, element):
-        result = super().get_mobjects_from(element)
-        for mob in result:
-            if not hasattr(mob, "glyph_label"):
-                mob.glyph_label = 0
-        try:
-            color_str = element.getAttribute("fill")
-            if color_str:
-                glyph_label = _LabelledTex.color_str_to_label(color_str)
-                for mob in result:
-                    mob.glyph_label = glyph_label
-        except:
-            pass
-        return result
-

 class _TexSpan(object):
    def __init__(self, script_type, label):
@ -76,16 +68,19 @@ class _TexSpan(object):


 class _TexParser(object):
-    def __init__(self, mtex):
-        self.tex_string = mtex.tex_string
+    def __init__(self, tex_string, additional_substrings):
+        self.tex_string = tex_string
        self.tex_spans_dict = {}
+        self.specified_substrings = []
        self.current_label = 0
-        self.add_tex_span((0, len(self.tex_string)))
-        self.break_up_by_braces()
+        self.brace_index_pairs = self.get_brace_index_pairs()
+        self.add_tex_span((0, len(tex_string)))
+        self.break_up_by_double_braces()
        self.break_up_by_scripts()
-        self.break_up_by_additional_strings(mtex.strings_to_break_up)
-        self.merge_unbreakable_commands(mtex.unbreakable_commands)
+        self.break_up_by_additional_substrings(additional_substrings)
+        self.check_if_overlap()
        self.analyse_containing_labels()
+        self.specified_substrings = remove_list_redundancies(self.specified_substrings)

    @staticmethod
    def label_to_color_tuple(rgb):
@ -96,6 +91,9 @@ class _TexParser(object):
        return r, g, b

    def add_tex_span(self, span_tuple, script_type=0, label=-1):
+        if span_tuple in self.tex_spans_dict:
+            return
+
        if script_type == 0:
            # Should be additionally labelled.
            label = self.current_label
@ -104,11 +102,14 @@ class _TexParser(object):
        tex_span = _TexSpan(script_type, label)
        self.tex_spans_dict[span_tuple] = tex_span

-    def break_up_by_braces(self):
-        tex_string = self.tex_string
-        span_tuples = []
+    def add_specified_substring(self, span_tuple):
+        substring = self.tex_string[slice(*span_tuple)]
+        self.specified_substrings.append(substring)
+
+    def get_brace_index_pairs(self):
+        result = []
        left_brace_indices = []
-        for match_obj in re.finditer(r"(\\*)(\{|\})", tex_string):
+        for match_obj in re.finditer(r"(\\*)(\{|\})", self.tex_string):
            # Braces following even numbers of backslashes are counted.
            if len(match_obj.group(1)) % 2 == 1:
                continue
@ -118,17 +119,32 @@ class _TexParser(object):
            else:
                left_brace_index = left_brace_indices.pop()
                right_brace_index = match_obj.span(2)[1]
-                span_tuples.append((left_brace_index, right_brace_index))
+                result.append((left_brace_index, right_brace_index))
        if left_brace_indices:
-            self.raise_tex_parsing_error()
+            self.raise_tex_parsing_error("unmatched braces")
+        return result

-        self.paired_braces_tuples = span_tuples
-        for span_tuple in span_tuples:
-            self.add_tex_span(span_tuple)
+    def break_up_by_double_braces(self):
+        # Match paired double braces (`{{...}}`).
+        skip_pair = False
+        for prev_span_tuple, span_tuple in _get_neighbouring_pairs(
+            self.brace_index_pairs
+        ):
+            if skip_pair:
+                skip_pair = False
+                continue
+            if all([
+                span_tuple[0] == prev_span_tuple[0] - 1,
+                span_tuple[1] == prev_span_tuple[1] + 1
+            ]):
+                self.add_tex_span(span_tuple)
+                self.add_specified_substring(span_tuple)
+                skip_pair = True

    def break_up_by_scripts(self):
+        # Match subscripts & superscripts.
        tex_string = self.tex_string
-        brace_indices_dict = dict(self.tex_spans_dict.keys())
+        brace_indices_dict = dict(self.brace_index_pairs)
        for match_obj in re.finditer(r"((?<!\\)(_|\^)\s*)|(\s+(_|\^)\s*)", tex_string):
            script_type = 1 if "_" in match_obj.group() else 2
            token_begin, token_end = match_obj.span()
@ -137,11 +153,11 @@ class _TexParser(object):
            else:
                content_match_obj = re.match(r"\w|\\[a-zA-Z]+", tex_string[token_end:])
                if not content_match_obj:
-                    self.raise_tex_parsing_error()
+                    self.raise_tex_parsing_error("unclear subscript/superscript")
                content_span = tuple([
                    index + token_end for index in content_match_obj.span()
                ])
-                self.add_tex_span(content_span)
+            self.add_tex_span(content_span)
            label = self.tex_spans_dict[content_span].label
            self.add_tex_span(
                (token_begin, content_span[1]),
@ -149,11 +165,11 @@ class _TexParser(object):
                label=label
            )

-    def break_up_by_additional_strings(self, strings_to_break_up):
+    def break_up_by_additional_substrings(self, additional_substrings):
        tex_string = self.tex_string
        all_span_tuples = []
-        for string in strings_to_break_up:
-            # Only matches non-crossing strings.
+        for string in additional_substrings:
+            # Only match non-crossing strings.
            for match_obj in re.finditer(re.escape(string), tex_string):
                all_span_tuples.append(match_obj.span())

@ -170,32 +186,30 @@ class _TexParser(object):
                if span_begin >= span_end:
                    continue
            span_tuple = (span_begin, span_end)
-            if span_tuple not in self.tex_spans_dict:
-                self.add_tex_span(span_tuple)
+            self.add_tex_span(span_tuple)
+            self.add_specified_substring(span_tuple)

-    def merge_unbreakable_commands(self, unbreakable_commands):
-        tex_string = self.tex_string
-        command_merge_spans = []
-        brace_indices_dict = dict(self.paired_braces_tuples)
-        # Braces leading by `unbreakable_commands` shouldn't be marked.
-        for command in unbreakable_commands:
-            for match_obj in re.finditer(re.escape(command), tex_string):
-                merge_begin_index = match_obj.span()[1]
-                merge_end_index = merge_begin_index
-                if merge_end_index not in brace_indices_dict:
+    def check_if_overlap(self):
+        span_tuples = sorted(
+            self.tex_spans_dict.keys(),
+            key=lambda t: (t[0], -t[1])
+        )
+        overlapping_span_pairs = []
+        for i, span_0 in enumerate(span_tuples):
+            for span_1 in span_tuples[i + 1 :]:
+                if span_0[1] <= span_1[0]:
                    continue
-                while merge_end_index in brace_indices_dict:
-                    merge_end_index = brace_indices_dict[merge_end_index]
-                command_merge_spans.append((merge_begin_index, merge_end_index))
-
-        self.tex_spans_dict = {
-            span_tuple: tex_span
-            for span_tuple, tex_span in self.tex_spans_dict.items()
-            if all([
-                not _contains(merge_span, span_tuple)
-                for merge_span in command_merge_spans
-            ])
-        }
+                if span_0[1] < span_1[1]:
+                    overlapping_span_pairs.append((span_0, span_1))
+        if overlapping_span_pairs:
+            tex_string = self.tex_string
+            log.error("Overlapping substring pairs occur in MTex:")
+            for span_tuple_pair in overlapping_span_pairs:
+                log.error(", ".join(
+                    f"\"{tex_string[slice(*span_tuple)]}\""
+                    for span_tuple in span_tuple_pair
+                ))
+            sys.exit(2)

    def analyse_containing_labels(self):
        for span_0, tex_span_0 in self.tex_spans_dict.items():
@ -219,8 +233,9 @@ class _TexParser(object):
        ], key=lambda t: (t[0], -t[1], -t[2]))[1:]

        result = tex_string[: indices_with_labels[0][0]]
-        index_with_label_pairs = _get_neighbouring_pairs(indices_with_labels)
-        for index_with_label, next_index_with_label in index_with_label_pairs:
+        for index_with_label, next_index_with_label in _get_neighbouring_pairs(
+            indices_with_labels
+        ):
            index, flag, _, label = index_with_label
            next_index, *_ = next_index_with_label
            # Adding one more pair of braces will help maintain the glyghs of tex file...
@ -238,8 +253,8 @@ class _TexParser(object):
            result += tex_string[index : next_index]
        return result

-    def raise_tex_parsing_error(self):
-        raise ValueError(f"Failed to parse tex: \"{self.tex_string}\"")
+    def raise_tex_parsing_error(self, message):
+        raise ValueError(f"Failed to parse tex ({message}): \"{self.tex_string}\"")


 class MTex(VMobject):
@ -251,7 +266,6 @@ class MTex(VMobject):
        "tex_environment": "align*",
        "isolate": [],
        "tex_to_color_map": {},
-        "unbreakable_commands": ["\\begin", "\\end"],
        "generate_plain_tex_file": False,
    }

@ -260,30 +274,33 @@ class MTex(VMobject):
        tex_string = tex_string.strip()
        # Prevent from passing an empty string.
        if not tex_string:
-            tex_string = " "
+            tex_string = "\\quad"
        self.tex_string = tex_string
-        self.set_strings_to_break_up()

        self.generate_mobject()

        self.init_colors()
        self.set_color_by_tex_to_color_map(self.tex_to_color_map)
-        scale_factor = SCALE_FACTOR_PER_FONT_POINT * self.font_size
-        self.scale(scale_factor)
+        self.scale(SCALE_FACTOR_PER_FONT_POINT * self.font_size)

-    def set_strings_to_break_up(self):
-        strings_to_break_up = remove_list_redundancies([
-            *self.isolate, *self.tex_to_color_map.keys()
+    def get_additional_substrings_to_break_up(self):
+        result = remove_list_redundancies([
+            *self.tex_to_color_map.keys(), *self.isolate
        ])
-        if "" in strings_to_break_up:
-            strings_to_break_up.remove("")
-        self.strings_to_break_up = strings_to_break_up
+        if "" in result:
+            result.remove("")
+        return result
+
+    def get_parser(self):
+        return _TexParser(self.tex_string, self.get_additional_substrings_to_break_up())

    def generate_mobject(self):
-        tex_parser = _TexParser(self)
+        tex_string = self.tex_string
+        tex_parser = self.get_parser()
        self.tex_spans_dict = tex_parser.tex_spans_dict
+        self.specified_substrings = tex_parser.specified_substrings

-        plain_full_tex = self.get_tex_file_body(self.tex_string)
+        plain_full_tex = self.get_tex_file_body(tex_string)
        plain_hash_val = hash(plain_full_tex)
        if plain_hash_val in TEX_HASH_TO_MOB_MAP:
            self.add(*TEX_HASH_TO_MOB_MAP[plain_hash_val].copy())
@ -296,7 +313,7 @@ class MTex(VMobject):
            self.add(*TEX_HASH_TO_MOB_MAP[hash_val].copy())
            return self

-        with display_during_execution(f"Writing \"{self.tex_string}\""):
+        with display_during_execution(f"Writing \"{tex_string}\""):
            filename = tex_to_svg_file(full_tex)
            svg_mob = _LabelledTex(filename)
            self.add(*svg_mob.copy())
@ -305,7 +322,7 @@ class MTex(VMobject):
        if not self.generate_plain_tex_file:
            return self

-        with display_during_execution(f"Writing \"{self.tex_string}\""):
+        with display_during_execution(f"Writing \"{tex_string}\""):
            filename = tex_to_svg_file(plain_full_tex)
            plain_svg_mob = _PlainTex(filename)
            svg_mob = TEX_HASH_TO_MOB_MAP[hash_val]
@ -363,23 +380,25 @@ class MTex(VMobject):
    def sort_scripts_in_tex_order(self):
        # LaTeX always puts superscripts before subscripts.
        # This function sorts the submobjects of scripts in the order of tex given.
+        tex_spans_dict = self.tex_spans_dict
        index_and_span_list = sorted([
            (index, span_tuple)
-            for span_tuple, tex_span in self.tex_spans_dict.items()
+            for span_tuple, tex_span in tex_spans_dict.items()
            if tex_span.script_type != 0
            for index in span_tuple
        ])
-        index_and_span_pairs = _get_neighbouring_pairs(index_and_span_list)

        switch_range_pairs = []
-        for index_and_span_0, index_and_span_1 in index_and_span_pairs:
+        for index_and_span_0, index_and_span_1 in _get_neighbouring_pairs(
+            index_and_span_list
+        ):
            index_0, span_tuple_0 = index_and_span_0
            index_1, span_tuple_1 = index_and_span_1
            if index_0 != index_1:
                continue
            if not all([
-                self.tex_spans_dict[span_tuple_0].script_type == 1,
-                self.tex_spans_dict[span_tuple_1].script_type == 2
+                tex_spans_dict[span_tuple_0].script_type == 1,
+                tex_spans_dict[span_tuple_1].script_type == 2
            ]):
                continue
            submob_range_0 = self.range_of_part(
@ -408,9 +427,10 @@ class MTex(VMobject):
        # Not sure whether this is the best practice...
        # This temporarily supports `TransformMatchingTex`.
        tex_string = self.tex_string
+        tex_spans_dict = self.tex_spans_dict
        # Use tex strings including "_", "^".
        label_dict = {}
-        for span_tuple, tex_span in self.tex_spans_dict.items():
+        for span_tuple, tex_span in tex_spans_dict.items():
            if tex_span.script_type != 0:
                label_dict[tex_span.label] = span_tuple
            else:
@ -427,7 +447,7 @@ class MTex(VMobject):
            curr_span_tuple = label_dict[curr_label]
            prev_span_tuple = label_dict[prev_label]
            next_span_tuple = label_dict[next_label]
-            containing_labels = self.tex_spans_dict[curr_span_tuple].containing_labels
+            containing_labels = tex_spans_dict[curr_span_tuple].containing_labels
            tex_string_spans.append([
                prev_span_tuple[1] if prev_label in containing_labels else curr_span_tuple[0],
                next_span_tuple[0] if next_label in containing_labels else curr_span_tuple[1]
@ -440,10 +460,11 @@ class MTex(VMobject):
            submob.get_tex = MethodType(lambda inst: inst.tex_string, submob)

    def get_part_by_span_tuples(self, span_tuples):
-        labels = remove_list_redundancies(list(it.chain(*[
-            self.tex_spans_dict[span_tuple].containing_labels
+        tex_spans_dict = self.tex_spans_dict
+        labels = set(it.chain(*[
+            tex_spans_dict[span_tuple].containing_labels
            for span_tuple in span_tuples
-        ])))
+        ]))
        return VGroup(*filter(
            lambda submob: submob.submob_label in labels,
            self.submobjects
@ -508,6 +529,12 @@ class MTex(VMobject):
        part = self.get_part_by_tex(tex, index=index)
        return self.indices_of_part(part)

+    def indices_of_all_parts_by_tex(self, tex, index=0):
+        all_parts = self.get_parts_by_tex(tex)
+        return list(it.chain(*[
+            self.indices_of_part(part) for part in all_parts
+        ]))
+
    def range_of_part(self, part):
        indices = self.indices_of_part(part)
        return range(indices[0], indices[-1] + 1)
@ -533,14 +560,11 @@ class MTex(VMobject):
            for span_tuple in self.tex_spans_dict.keys()
        ])

-    def print_tex_strings_of_submobjects(self):
-        # For debugging
-        # Work with `index_labels()`
-        print("\n")
-        print(f"Submobjects of \"{self.get_tex()}\":")
+    def list_tex_strings_of_submobjects(self):
+        # Work with `index_labels()`.
+        log.debug(f"Submobjects of \"{self.get_tex()}\":")
        for i, submob in enumerate(self.submobjects):
-            print(f"{i}: \"{submob.get_tex()}\"")
-        print("\n")
+            log.debug(f"{i}: \"{submob.get_tex()}\"")


 class MTexText(MTex):