Merge branch 'main' of github.com:helblazer811/ManimML

2025-07-03 23:02:00 +08:00 · 2022-12-30 22:38:19 -05:00
parent 6ba45c163c 0bc3ad561b
commit 313d1aa650
5 changed files with 87 additions and 205 deletions
--- a/examples/cnn/cnn.py
+++ b/examples/cnn/cnn.py
@ -1,25 +1,42 @@
+from pathlib import Path
+
 from manim import *
 from PIL import Image

+<<<<<<< HEAD
 from manim_ml.neural_network.layers.convolutional3d import Convolutional3DLayer
+=======
+from manim_ml.neural_network.layers import Convolutional3DLayer
+>>>>>>> 0bc3ad561ba224f3d33e9f843665c1d50d64a68b
 from manim_ml.neural_network.layers.feed_forward import FeedForwardLayer
 from manim_ml.neural_network.layers.image import ImageLayer
 from manim_ml.neural_network.neural_network import NeuralNetwork

+<<<<<<< HEAD
 # Make the specific scene
 config.pixel_height = 1200
 config.pixel_width = 1900
 config.frame_height = 7.0
 config.frame_width = 7.0
+=======
+ROOT_DIR = Path(__file__).parents[2]
+>>>>>>> 0bc3ad561ba224f3d33e9f843665c1d50d64a68b

 def make_code_snippet():
    code_str = """
        # Make nn
        nn = NeuralNetwork([
+<<<<<<< HEAD
            ImageLayer(numpy_image, height=1.5),
            Convolutional3DLayer(1, 7, 7, 3, 3),
            Convolutional3DLayer(3, 5, 5, 3, 3),
            Convolutional3DLayer(5, 3, 3, 1, 1),
+=======
+            ImageLayer(numpy_image),
+            Convolutional3DLayer(3, 3, 3),
+            Convolutional3DLayer(5, 2, 2),
+            Convolutional3DLayer(10, 2, 1),
+>>>>>>> 0bc3ad561ba224f3d33e9f843665c1d50d64a68b
            FeedForwardLayer(3),
            FeedForwardLayer(3),
        ])
@ -43,10 +60,11 @@ def make_code_snippet():

 class CombinedScene(ThreeDScene):
    def construct(self):
-        image = Image.open('../../assets/mnist/digit.jpeg')
+        image = Image.open(ROOT_DIR / 'assets/mnist/digit.jpeg')
        numpy_image = np.asarray(image)
        # Make nn
        nn = NeuralNetwork([
+<<<<<<< HEAD
                ImageLayer(numpy_image, height=1.5),
                Convolutional3DLayer(1, 7, 7, 3, 3, filter_spacing=0.32),
                Convolutional3DLayer(3, 5, 5, 3, 3, filter_spacing=0.32),
@ -57,6 +75,16 @@ class CombinedScene(ThreeDScene):
            layer_spacing=0.25,
        )
        # Center the nn
+=======
+            ImageLayer(numpy_image, height=3.5),
+            Convolutional3DLayer(3, 3, 3, filter_spacing=0.2),
+            Convolutional3DLayer(5, 2, 2, filter_spacing=0.2),
+            Convolutional3DLayer(10, 2, 1, filter_spacing=0.2),
+            FeedForwardLayer(3, rectangle_stroke_width=4, node_stroke_width=4).scale(2),
+            FeedForwardLayer(1, rectangle_stroke_width=4, node_stroke_width=4).scale(2)
+        ], layer_spacing=0.2)
+        nn.scale(0.9)
+>>>>>>> 0bc3ad561ba224f3d33e9f843665c1d50d64a68b
        nn.move_to(ORIGIN)
        self.add(nn)
        # Make code snippet
--- a/examples/disentanglement/disentanglement.py
+++ b/examples/disentanglement/disentanglement.py
@ -1,38 +1,42 @@
 """This module is dedicated to visualizing VAE disentanglement"""
-import sys
-import os
-sys.path.append(os.environ["PROJECT_ROOT"])
+from pathlib import Path
+
 from manim import *
-from manim_ml.neural_network import NeuralNetwork
-import manim_ml.util as util
+
+from manim_ml.neural_network.layers import FeedForwardLayer
+from manim_ml.neural_network.neural_network import NeuralNetwork
 import pickle

-class VAEDecoder(VGroup):
-    """Just shows the VAE encoder"""
+ROOT_DIR = Path(__file__).parents[2]

-    def __init__(self):
-        super(VGroup, self).__init__()
-        # Setup the Neural Network
-        node_counts = [3, 5]
-        self.neural_network = NeuralNetwork(node_counts, layer_spacing=0.55)
-        self.add(self.neural_network)

-    def make_encoding_animation(self):
-        pass     
+def construct_image_mobject(input_image, height=2.3):
+    """Constructs an ImageMobject from a numpy grayscale image"""
+    # Convert image to rgb
+    if len(input_image.shape) == 2:
+        input_image = np.repeat(input_image, 3, axis=0)
+        input_image = np.rollaxis(input_image, 0, start=3)
+    #  Make the ImageMobject
+    image_mobject = ImageMobject(input_image, image_mode="RGB")
+    image_mobject.set_resampling_algorithm(RESAMPLING_ALGORITHMS["nearest"])
+    image_mobject.height = height
+
+    return image_mobject

 class DisentanglementVisualization(VGroup):

-    def __init__(self, model_path=os.path.join(os.environ["PROJECT_ROOT"], "examples/variational_autoencoder/autoencoder_models/saved_models/model_dim2.pth"), image_height=0.35):
+    def __init__(self, model_path=ROOT_DIR / "examples/variational_autoencoder/autoencoder_models/saved_models/model_dim2.pth", image_height=0.35):
        self.model_path = model_path
        self.image_height = image_height
        # Load disentanglement image objects
-        with open(os.path.join(os.environ["PROJECT_ROOT"], "examples/variational_autoencoder/autoencoder_models/disentanglement.pkl"), "rb") as f:
+        with open(ROOT_DIR/ "examples/variational_autoencoder/autoencoder_models/disentanglement.pkl", "rb") as f:
            self.image_handler = pickle.load(f)

+
    def make_disentanglement_generation_animation(self):
        animation_list = []
        for image_index, image in enumerate(self.image_handler["images"]):
-            image_mobject = util.construct_image_mobject(image, height=self.image_height)
+            image_mobject = construct_image_mobject(image, height=self.image_height)
            r, c = self.image_handler["bin_indices"][image_index]
            # Move the image to the correct location
            r_offset = -1.2
@ -80,7 +84,11 @@ class DisentanglementScene(Scene):

    def construct(self):
        # Make the VAE decoder
-        vae_decoder = VAEDecoder()
+        vae_decoder =  NeuralNetwork([
+            FeedForwardLayer(3),
+            FeedForwardLayer(5),
+        ], layer_spacing=0.55)
+
        vae_decoder.shift([-0.55, 0, 0])
        self.play(Create(vae_decoder), run_time=1)
        # Make the embedding
--- a/examples/gan/gan.py
+++ b/examples/gan/gan.py
@ -1,4 +1,6 @@
 import random
+from pathlib import Path
+
 from PIL import Image
 from manim import *
 from manim_ml.neural_network.layers.embedding import EmbeddingLayer
@ -8,6 +10,8 @@ from manim_ml.neural_network.layers.vector import VectorLayer

 from manim_ml.neural_network.neural_network import NeuralNetwork

+ROOT_DIR = Path(__file__).parents[2]
+
 config.pixel_height = 1080
 config.pixel_width = 1080
 config.frame_height = 8.3
@ -25,7 +29,7 @@ class GAN(Mobject):
    def make_entities(self, image_height=1.2):
        """Makes all of the network entities"""
        # Make the fake image layer
-        default_image = Image.open('../../assets/gan/fake_image.png')
+        default_image = Image.open(ROOT_DIR / 'assets/gan/fake_image.png')
        numpy_image = np.asarray(default_image)
        self.fake_image_layer = ImageLayer(numpy_image, height=image_height, show_image_on_create=False) 
        # Make the Generator Network
@ -45,7 +49,7 @@ class GAN(Mobject):
        ], layer_spacing=0.1)
        self.add(self.discriminator)
        # Make Ground Truth Dataset
-        default_image = Image.open('../../assets/gan/real_image.jpg')
+        default_image = Image.open(ROOT_DIR / 'assets/gan/real_image.jpg')
        numpy_image = np.asarray(default_image)
        self.ground_truth_layer = ImageLayer(numpy_image, height=image_height)
        self.add(self.ground_truth_layer)
--- a/examples/paper_visualizations/oracle_guidance/oracle_guidance.py
+++ b/examples/paper_visualizations/oracle_guidance/oracle_guidance.py
@ -1,6 +1,8 @@
 """
    Here is a animated explanatory figure for the "Oracle Guided Image Synthesis with Relative Queries" paper. 
 """
+from pathlib import Path
+
 from manim import *
 from manim_ml.neural_network.layers import triplet
 from manim_ml.neural_network.layers.image import ImageLayer
@ -19,6 +21,8 @@ config.pixel_width = 1900
 config.frame_height = 6.0
 config.frame_width = 6.0

+ROOT_DIR = Path(__file__).parents[3]
+
 class Localizer():
    """
        Holds the localizer object, which contains the queries, images, etc.
@ -30,8 +34,8 @@ class Localizer():
        self.index = -1
        self.axes = axes
        self.num_queries = 3
-        self.assets_path = "../../../assets/oracle_guidance"
-        self.ground_truth_image_path = os.path.join(self.assets_path, "ground_truth.jpg")
+        self.assets_path = ROOT_DIR / "assets/oracle_guidance"
+        self.ground_truth_image_path = self.assets_path  / "ground_truth.jpg"
        self.ground_truth_location = np.array([2, 3])
        # Prior distribution
        print("initial gaussian")
@ -119,7 +123,7 @@ class OracleGuidanceVisualization(Scene):
        self.title = None
        # Set image paths
        # VAE embedding animation image paths
-        self.assets_path = "../../../assets/oracle_guidance"
+        self.assets_path = ROOT_DIR / "assets/oracle_guidance"
        self.input_embed_image_path = os.path.join(self.assets_path, "input_image.jpg")
        self.output_embed_image_path = os.path.join(self.assets_path, "output_image.jpg")

--- a/examples/variational_autoencoder/variational_autoencoder.py
+++ b/examples/variational_autoencoder/variational_autoencoder.py
@ -4,123 +4,20 @@ In this module I define Manim visualizations for Variational Autoencoders
 and Traditional Autoencoders.

 """
+from pathlib import Path
+
 from manim import *
-import pickle
 import numpy as np
-import os
 from PIL import Image
-import manim_ml.neural_network as neural_network
-from manim_ml.neural_network.embedding import EmbeddingLayer
-from manim_ml.neural_network.feed_forward import FeedForwardLayer
-from manim_ml.neural_network.image import ImageLayer
+from manim_ml.neural_network.layers import EmbeddingLayer
+from manim_ml.neural_network.layers import FeedForwardLayer
+from manim_ml.neural_network.layers import ImageLayer
 from manim_ml.neural_network.neural_network import NeuralNetwork

-class VariationalAutoencoder(VGroup):
-    """Variational Autoencoder Manim Visualization"""
+ROOT_DIR = Path(__file__).parents[2]

-    def __init__(
-        self, encoder_nodes_per_layer=[5, 3], decoder_nodes_per_layer=[3, 5], point_color=BLUE, 
-        dot_radius=0.05, ellipse_stroke_width=2.0, layer_spacing=0.5
-    ):
-        super(VGroup, self).__init__()
-        self.encoder_nodes_per_layer = encoder_nodes_per_layer
-        self.decoder_nodes_per_layer = decoder_nodes_per_layer
-        self.point_color = point_color
-        self.dot_radius = dot_radius
-        self.layer_spacing = layer_spacing
-        self.ellipse_stroke_width = ellipse_stroke_width
-        # Make the VMobjects
-        self.encoder, self.decoder = self._construct_encoder_and_decoder()
-        self.embedding = self._construct_embedding()
-        # Setup the relative locations
-        self.embedding.move_to(self.encoder)
-        self.embedding.shift([1.4 * self.encoder.width, 0, 0])
-        self.decoder.move_to(self.embedding)
-        self.decoder.shift([self.decoder.width * 1.4, 0, 0])
-        # Add the objects to the VAE object
-        self.add(self.encoder)
-        self.add(self.decoder)
-        self.add(self.embedding)
-
-    def _construct_encoder_and_decoder(self):
-        """Makes the VAE encoder and decoder"""
-        # Make the encoder
-        layer_node_count = self.encoder_nodes_per_layer
-        encoder = neural_network.NeuralNetwork(layer_node_count, dot_radius=self.dot_radius, layer_spacing=self.layer_spacing)
-        encoder.scale(1.2)
-        # Make the decoder
-        layer_node_count = self.decoder_nodes_per_layer
-        decoder = neural_network.NeuralNetwork(layer_node_count, dot_radius=self.dot_radius, layer_spacing=self.layer_spacing)
-        decoder.scale(1.2)
-
-        return encoder, decoder
-
-    def _construct_embedding(self):
-        """Makes a Gaussian-like embedding"""
-        embedding = VGroup()
-        # Sample points from a Gaussian
-        num_points = 200
-        standard_deviation = [0.9, 0.9]
-        mean = [0, 0]
-        points = np.random.normal(mean, standard_deviation, size=(num_points, 2))
-        # Make an axes
-        embedding.axes = Axes(
-            x_range=[-3, 3],
-            y_range=[-3, 3],
-            x_length=2.2,
-            y_length=2.2,
-            tips=False,
-        )
-        # Add each point to the axes
-        self.point_dots = VGroup()
-        for point in points:
-            point_location = embedding.axes.coords_to_point(*point)
-            dot = Dot(point_location, color=self.point_color, radius=self.dot_radius/2) 
-            self.point_dots.add(dot)
-
-        embedding.add(self.point_dots)
-        return embedding
-
-    def _construct_image_mobject(self, input_image, height=2.3):
-        """Constructs an ImageMobject from a numpy grayscale image"""
-        # Convert image to rgb
-        input_image = np.repeat(input_image, 3, axis=0)
-        input_image = np.rollaxis(input_image, 0, start=3)
-        #  Make the ImageMobject
-        image_mobject = ImageMobject(input_image, image_mode="RGB")
-        image_mobject.set_resampling_algorithm(RESAMPLING_ALGORITHMS["nearest"])
-        image_mobject.height = height
-
-        return image_mobject
-
-    def _construct_input_output_images(self, image_pair):
-        """Places the input and output images for the AE"""
-        # Takes the image pair
-        # image_pair is assumed to be [2, x, y]
-        input_image = image_pair[0][None, :, :]
-        recon_image = image_pair[1][None, :, :]
-        # Make the image mobjects
-        input_image_object = self._construct_image_mobject(input_image)
-        recon_image_object = self._construct_image_mobject(recon_image)
-
-        return input_image_object, recon_image_object
-
-    def make_dot_convergence_animation(self, location, run_time=1.5):
-        """Makes dots converge on a specific location"""
-        # Move to location
-        animations = []
-        for dot in self.encoder.dots:
-            coords = self.embedding.axes.coords_to_point(*location)
-            animations.append(dot.animate.move_to(coords))
-        move_animations = AnimationGroup(*animations, run_time=1.5)
-        # Follow up with remove animations
-        remove_animations = []
-        for dot in self.encoder.dots:
-            remove_animations.append(FadeOut(dot))
-        remove_animations = AnimationGroup(*remove_animations, run_time=0.2)
-
-        animation_group = Succession(move_animations, remove_animations, lag_ratio=1.0)

+<<<<<<< HEAD
        return animation_group

    def make_dot_divergence_animation(self, location, run_time=3.0):
@ -243,84 +140,25 @@ class VariationalAutoencoder(VGroup):
        )

        return animation_group
+=======
+class VAEScene(Scene):
+    """Scene object for a Variational Autoencoder and Autoencoder"""
+>>>>>>> 0bc3ad561ba224f3d33e9f843665c1d50d64a68b

-class VariationalAutoencoder(VGroup):
+    def construct(self):

-    def __init__(self):
-        embedding_layer = EmbeddingLayer()
-        
-        image = Image.open('images/image.jpeg')
-        numpy_image = np.asarray(image)
-        # Make nn
-        neural_network = NeuralNetwork([
+        numpy_image = np.asarray(Image.open(ROOT_DIR / 'assets/mnist/digit.jpeg'))
+        vae = NeuralNetwork([
            ImageLayer(numpy_image, height=1.4),
            FeedForwardLayer(5),
            FeedForwardLayer(3),
-            embedding_layer,
+            EmbeddingLayer(dist_theme="ellipse").scale(2),
            FeedForwardLayer(3),
            FeedForwardLayer(5),
            ImageLayer(numpy_image, height=1.4),
        ])

-        neural_network.scale(1.3)
-
-        self.play(Create(neural_network))
-        self.play(neural_network.make_forward_pass_animation(run_time=15))
-
-class MNISTImageHandler():
-    """Deals with loading serialized VAE mnist images from "autoencoder_models" """
-
-    def __init__(
-        self, 
-        image_pairs_file_path=os.path.join(os.environ["PROJECT_ROOT"], "examples/variational_autoencoder/autoencoder_models/image_pairs.pkl"), 
-        interpolations_file_path=os.path.join(os.environ["PROJECT_ROOT"], "examples/variational_autoencoder/autoencoder_models/interpolations.pkl")
-    ):
-        self.image_pairs_file_path = image_pairs_file_path
-        self.interpolations_file_path = interpolations_file_path
-
-        self.image_pairs = []
-        self.interpolation_images = []
-        self.interpolation_latent_path = []
-
-        self.load_serialized_data()
-
-    def load_serialized_data(self):
-        with open(self.image_pairs_file_path, "rb") as f:
-            self.image_pairs = pickle.load(f)
-
-        with open(self.interpolations_file_path, "rb") as f:
-            self.interpolation_dict = pickle.load(f)
-            self.interpolation_images = self.interpolation_dict["interpolation_images"]
-            self.interpolation_latent_path = self.interpolation_dict["interpolation_path"]
-
-"""
-    The VAE Scene for the twitter video. 
-"""
-config.pixel_height = 720 
-config.pixel_width = 1280 
-config.frame_height = 5.0
-config.frame_width = 5.0
-# Set random seed so point distribution is constant
-np.random.seed(1)
-
-class VAEScene(Scene):
-    """Scene object for a Variational Autoencoder and Autoencoder"""
-
-    def construct(self):
-        # Set Scene config
-        vae = VariationalAutoencoder()
-        mnist_image_handler = MNISTImageHandler()
-        image_pair = mnist_image_handler.image_pairs[3]
-        vae.move_to(ORIGIN)
        vae.scale(1.3)
-        self.play(Create(vae), run_time=3)
-        # Make a forward pass animation
-        forward_pass_animation = vae.make_forward_pass_animation(image_pair)
-        self.play(forward_pass_animation)
-        # Remove the input and output images
-        reset_animation = vae.make_reset_vae_animation()
-        self.play(reset_animation)
-        # Interpolation animation
-        interpolation_images = mnist_image_handler.interpolation_images
-        interpolation_animation = vae.make_interpolation_animation(interpolation_images)
-        self.play(interpolation_animation)
+
+        self.play(Create(vae))
+        self.play(vae.make_forward_pass_animation(run_time=15))