Working VAE forward pass animation. All I need now is valid images.

2025-06-29 02:07:38 +08:00 · 2022-02-03 01:29:02 -05:00
parent 1ec32f377a
commit 8140aec3be
13 changed files with 346 additions and 32 deletions
--- a/6
+++ b/6
@ -1,6 +1,10 @@
 video:
 	manim -pqh src/vae.py VAEScene --media_dir media
-	cp media/videos/vae/1080p60/VAEScene.mp4 final_videos
+	cp media/videos/vae/720p60/VAEScene.mp4 final_videos
+train:
+	cd src/autoencoder_models
+	python vanilla_autoencoder.py
+	python variational_autoencoder.py
 checkstyle:
 	pycodestyle src
 	pydocstyle src
--- a/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte
+++ b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte
--- a/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz
+++ b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz
--- a/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte
+++ b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte
--- a/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz
+++ b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz
--- a/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte
+++ b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte
--- a/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz
+++ b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz
--- a/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte
+++ b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte
--- a/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz
+++ b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz
--- a/src/autoencoder_models/vanilla_autoencoder.py
+++ b/src/autoencoder_models/vanilla_autoencoder.py
@ -0,0 +1,112 @@
+import torch
+from torchvision import datasets
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+# Transforms images to a PyTorch Tensor
+tensor_transform = transforms.ToTensor()
+  
+# Download the MNIST Dataset
+dataset = datasets.MNIST(root = "./data",
+                         train = True,
+                         download = True,
+                         transform = tensor_transform)
+  
+# DataLoader is used to load the dataset 
+# for training
+loader = torch.utils.data.DataLoader(dataset = dataset,
+                                     batch_size = 32,
+                                     shuffle = True)
+
+                                     # Creating a PyTorch class
+# 28*28 ==> 9 ==> 28*28
+class VAE(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        # Building an linear encoder with Linear
+        # layer followed by Relu activation function
+        # 784 ==> 9
+        self.encoder = torch.nn.Sequential(
+            torch.nn.Linear(28 * 28, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 18),
+            torch.nn.ReLU(),
+        )
+        self.mean_embedding = torch.nn.Linear(18, 9)
+        self.logvar_embedding = torch.nn.Linear(18, 9)
+
+        # Building an linear decoder with Linear
+        # layer followed by Relu activation function
+        # The Sigmoid activation function
+        # outputs the value between 0 and 1
+        # 9 ==> 784
+        self.decoder = torch.nn.Sequential(
+            torch.nn.Linear(9, 18),
+            torch.nn.ReLU(),
+            torch.nn.Linear(18, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 28 * 28),
+            torch.nn.Sigmoid()
+        )
+  
+    def forward(self, x):
+        encoded = self.encoder(x)
+        mean = self.mean_embedding(encoded)
+        logvar = self.logvar_embedding(encoded)
+        combined = torch.cat((mean, logvar), dim=1)
+        reconstructed = self.decoder(combined)
+        return mean, logvar, reconstructed, x
+
+# Model Initialization
+model = VAE()
+# Validation using MSE Loss function
+def loss_function(mean, log_var, reconstructed, original):
+    kl = torch.mean(-0.5 * torch.sum(1 + log_var - mean ** 2 - log_var.exp(), dim = 1), dim = 0)
+    recon = torch.nn.functional.mse_loss(reconstructed, original)
+
+    return kl + recon
+
+# Using an Adam Optimizer with lr = 0.1
+optimizer = torch.optim.Adam(model.parameters(),
+                             lr = 1e-1,
+                             weight_decay = 1e-8)
+
+epochs = 10
+outputs = []
+losses = []
+for epoch in tqdm(range(epochs)):
+    for (image, _) in loader:
+      # Reshaping the image to (-1, 784)
+      image = image.reshape(-1, 28*28)
+      # Output of Autoencoder
+      mean, log_var, reconstructed, image = model(image)
+      # Calculating the loss function
+      loss = loss_function(mean, log_var, reconstructed, image)
+      # The gradients are set to zero,
+      # the the gradient is computed and stored.
+      # .step() performs parameter update
+      optimizer.zero_grad()
+      loss.backward()
+      optimizer.step()
+      # Storing the losses in a list for plotting
+      losses.append(loss.detach().cpu())
+    outputs.append((epochs, image, reconstructed))
+
+# Defining the Plot Style
+plt.style.use('fivethirtyeight')
+plt.xlabel('Iterations')
+plt.ylabel('Loss')
+  
+# Plotting the last 100 values
+print(losses)
+plt.plot(losses[-100:])
+plt.show()
--- a/src/autoencoder_models/variational_autoencoder.py
+++ b/src/autoencoder_models/variational_autoencoder.py
@ -0,0 +1,102 @@
+import torch
+from torchvision import datasets
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+# Transforms images to a PyTorch Tensor
+tensor_transform = transforms.ToTensor()
+  
+# Download the MNIST Dataset
+dataset = datasets.MNIST(root = "./data",
+                         train = True,
+                         download = True,
+                         transform = tensor_transform)
+  
+# DataLoader is used to load the dataset 
+# for training
+loader = torch.utils.data.DataLoader(dataset = dataset,
+                                     batch_size = 32,
+                                     shuffle = True)
+
+                                     # Creating a PyTorch class
+# 28*28 ==> 9 ==> 28*28
+class AE(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+          
+        # Building an linear encoder with Linear
+        # layer followed by Relu activation function
+        # 784 ==> 9
+        self.encoder = torch.nn.Sequential(
+            torch.nn.Linear(28 * 28, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 18),
+            torch.nn.ReLU(),
+            torch.nn.Linear(18, 9)
+        )
+          
+        # Building an linear decoder with Linear
+        # layer followed by Relu activation function
+        # The Sigmoid activation function
+        # outputs the value between 0 and 1
+        # 9 ==> 784
+        self.decoder = torch.nn.Sequential(
+            torch.nn.Linear(9, 18),
+            torch.nn.ReLU(),
+            torch.nn.Linear(18, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 28 * 28),
+            torch.nn.Sigmoid()
+        )
+  
+    def forward(self, x):
+        encoded = self.encoder(x)
+        decoded = self.decoder(encoded)
+        return decoded
+
+# Model Initialization
+model = AE()
+# Validation using MSE Loss function
+loss_function = torch.nn.MSELoss()
+# Using an Adam Optimizer with lr = 0.1
+optimizer = torch.optim.Adam(model.parameters(),
+                             lr = 1e-1,
+                             weight_decay = 1e-8)
+
+epochs = 10
+outputs = []
+losses = []
+for epoch in tqdm(range(epochs)):
+    for (image, _) in loader:
+      # Reshaping the image to (-1, 784)
+      image = image.reshape(-1, 28*28)
+      # Output of Autoencoder
+      reconstructed = model(image)
+      # Calculating the loss function
+      loss = loss_function(reconstructed, image)
+      # The gradients are set to zero,
+      # the the gradient is computed and stored.
+      # .step() performs parameter update
+      optimizer.zero_grad()
+      loss.backward()
+      optimizer.step()
+      # Storing the losses in a list for plotting
+      losses.append(loss.detach().cpu())
+    outputs.append((epochs, image, reconstructed))
+  
+# Defining the Plot Style
+plt.style.use('fivethirtyeight')
+plt.xlabel('Iterations')
+plt.ylabel('Loss')
+  
+# Plotting the last 100 values
+plt.plot(losses[-100:])
--- a/src/neural_network.py
+++ b/src/neural_network.py
@ -15,9 +15,9 @@ class NeuralNetworkLayer(VGroup):
    """Handles rendering a layer for a neural network"""

    def __init__(
-            self, num_nodes, layer_width=0.3, node_radius=0.2, 
+            self, num_nodes, layer_width=0.2, node_radius=0.12, 
            node_color=BLUE, node_outline_color=WHITE, rectangle_color=WHITE,
-            node_spacing=0.6, rectangle_fill_color=BLACK):
+            node_spacing=0.4, rectangle_fill_color=BLACK):
        super(VGroup, self).__init__()
        self.num_nodes = num_nodes
        self.layer_width = layer_width
@ -53,8 +53,8 @@ class NeuralNetwork(VGroup):

    def __init__(
            self, layer_node_count, layer_width=1.0, node_radius=1.0, 
-            node_color=BLUE, edge_color=WHITE, layer_spacing=1.5,
-            animation_dot_color=ORANGE):
+            node_color=BLUE, edge_color=WHITE, layer_spacing=1.2,
+            animation_dot_color=RED):
        super(VGroup, self).__init__()
        self.layer_node_count = layer_node_count
        self.layer_width = layer_width
@ -64,6 +64,9 @@ class NeuralNetwork(VGroup):
        self.layer_spacing = layer_spacing
        self.animation_dot_color = animation_dot_color

+        # TODO take layer_node_count [0, (1, 2), 0] 
+        # and make it have explicit distinct subspaces
+        
        self.layers = self._construct_layers()
        self.edge_layers = self._construct_edges()

@ -105,19 +108,22 @@ class NeuralNetwork(VGroup):
        """Generates an animation for feed forward propogation"""
        all_animations = []
        per_layer_run_time = run_time / len(self.edge_layers)
+        self.dots = VGroup()
        for edge_layer in self.edge_layers:
            path_animations = []
            for edge in edge_layer:
-                dot = Dot(color=self.animation_dot_color, fill_opacity=1.0)
+                dot = Dot(color=self.animation_dot_color, fill_opacity=1.0, radius=0.06)
                # Handle layering
                dot.set_z_index(1)
+                # Add to dots group
+                self.dots.add(dot)
                # Make the animation
-                anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=linear)
+                anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=sigmoid)
                path_animations.append(anim)
            path_animation_group = AnimationGroup(*path_animations)
            all_animations.append(path_animation_group)

-        animation_group = AnimationGroup(*all_animations, lag_ratio=1)
+        animation_group = AnimationGroup(*all_animations, run_time=run_time, lag_ratio=1)

        return animation_group

@ -132,4 +138,9 @@ class TestNeuralNetworkScene(Scene):
        # Make Animation
        self.add(nn)
        forward_propagation_animation = nn.make_forward_propagation_animation()
+
+        second_nn = NeuralNetwork([3, 4])
+        self.add(second_nn)
+
        self.play(forward_propagation_animation)
+        self.play(second_nn.make_forward_propagation_animation())
--- a/src/vae.py
+++ b/src/vae.py
@ -4,6 +4,7 @@ In this module I define Manim visualizations for Variational Autoencoders
 and Traditional Autoencoders.

 """
+from typing_extensions import runtime
 from manim import *
 import numpy as np
 import neural_network
@ -11,20 +12,21 @@ import neural_network
 class Autoencoder(VGroup):
    """Traditional Autoencoder Manim Visualization"""

-    def __init__(self, encoder_nodes_per_layer=[6, 4], decoder_nodes_per_layer=[4, 6], point_color=BLUE):
+    def __init__(self, encoder_nodes_per_layer=[5, 3], decoder_nodes_per_layer=[3, 5], point_color=BLUE, dot_radius=0.06):
        super(VGroup, self).__init__()
        self.encoder_nodes_per_layer = encoder_nodes_per_layer
        self.decoder_nodes_per_layer = decoder_nodes_per_layer
        self.point_color = point_color
+        self.dot_radius = dot_radius
        # Make the VMobjects
        self.encoder, self.decoder = self._construct_encoder_and_decoder()
        self.embedding = self._construct_embedding()
        # self.input_image, self.output_image = self._construct_input_output_images()
        # Setup the relative locations
        self.embedding.move_to(self.encoder)
-        self.embedding.shift([0.9 * self.embedding.width, 0, 0])
+        self.embedding.shift([1.1 * self.encoder.width, 0, 0])
        self.decoder.move_to(self.embedding)
-        self.decoder.shift([self.embedding.width * 0.9, 0, 0])
+        self.decoder.shift([self.decoder.width * 1.1, 0, 0])
        # self.embedding.shift(self.encoder.width * 1.5)
        # self.decoder.move_to(self.embedding.get_center())
        # Add the objects to the VAE object
@ -50,32 +52,29 @@ class Autoencoder(VGroup):
        embedding = VGroup()
        # Sample points from a Gaussian
        num_points = 200
-        standard_deviation = [1, 1]
+        standard_deviation = [0.7, 0.7]
        mean = [0, 0]
        points = np.random.normal(mean, standard_deviation, size=(num_points, 2))
        # Make an axes
        embedding.axes = Axes(
            x_range=[-3, 3],
            y_range=[-3, 3],
-            x_length = 3,
-            y_length = 3,
+            x_length=2.5,
+            y_length=2.5,
            tips=False,
        )
        # Add each point to the axes
-        point_dots = VGroup()
+        self.point_dots = VGroup()
        for point in points:
            point_location = embedding.axes.coords_to_point(*point)
-            dot = Dot(point_location, color=self.point_color) 
-            point_dots.add(dot)
+            dot = Dot(point_location, color=self.point_color, radius=self.dot_radius / 2) 
+            self.point_dots.add(dot)

-        embedding.add(point_dots)
+        embedding.add(self.point_dots)
        return embedding

-    def _construct_input_output_images(self):
-        pass
-
-    def make_embedding_generation_animation(self):
-        """Animates the embedding getting created"""
+    def _construct_input_output_images(self, input_output_image_pairs):
+        """Places the input and output images for the AE"""
        pass

    def make_forward_pass_animation(self, run_time=2):
@ -84,18 +83,18 @@ class Autoencoder(VGroup):
        # Make encoder forward pass
        encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime)
        # Make red dot in embedding
-        location = np.random.normal(0, 1, (2))
+        location = [1.0, 1.5]
        location_point = self.embedding.axes.coords_to_point(*location)
-        dot = Dot(location_point, color=RED)
-        create_dot_animation = Create(dot, run_time=per_unit_runtime)
+        # dot = Dot(location_point, color=RED)
+        # create_dot_animation = Create(dot, run_time=per_unit_runtime)
        # Make decoder foward pass
        decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime)
        # Add the animations to the group
-        animation_group = AnimationGroup(
+        animation_group = Succession(
            encoder_forward_pass,
            create_dot_animation,
            decoder_forward_pass,
-            lag_ratio=1
+            lag_ratio=1,
        )

        return animation_group
@ -108,19 +107,105 @@ class VariationalAutoencoder(Autoencoder):
    """Variational Autoencoder Manim Visualization"""
    
    def __init__(self):
-        super(self, Autoencoder).__init__()
+        super().__init__()

-    def make_forward_pass_animation(self):
+    def make_dot_convergence_animation(self, location, run_time=1.5):
+        """Makes dots converge on a specific location"""
+        # Move to location
+        animations = []
+        for dot in self.encoder.dots:
+            coords = self.embedding.axes.coords_to_point(*location)
+            animations.append(dot.animate.move_to(coords))
+        move_animations = AnimationGroup(*animations, run_time=1.5)
+        # Follow up with remove animations
+        remove_animations = []
+        for dot in self.encoder.dots:
+            remove_animations.append(FadeOut(dot))
+        remove_animations = AnimationGroup(*remove_animations, run_time=0.2)
+
+        animation_group = Succession(move_animations, remove_animations, lag_ratio=1.0)
+
+        return animation_group
+
+    def make_dot_divergence_animation(self, location, run_time=3.0):
+        """Makes dots diverge from the given location and move the decoder"""
+        animations = []
+        for node in self.decoder.layers[0].node_group:
+            new_dot = Dot(location, radius=self.dot_radius, color=RED)
+            per_node_succession = Succession(
+                Create(new_dot),
+                new_dot.animate.move_to(node.get_center()),
+            )
+            animations.append(per_node_succession)
+
+        animation_group = AnimationGroup(*animations)
+        return animation_group
+
+    def make_forward_pass_animation(self, run_time=1.5):
        """Overriden forward pass animation specific to a VAE"""
-        return super().make_forward_pass_animation()
+        per_unit_runtime = run_time
+        # Make encoder forward pass
+        encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime)
+        # Make red dot in embedding
+        mean = [1.0, 1.5]
+        mean_point = self.embedding.axes.coords_to_point(*mean)
+        std = [0.8, 1.2]
+        # Make the dot convergence animation
+        dot_convergence_animation = self.make_dot_convergence_animation(mean, run_time=per_unit_runtime)
+        encoding_succesion = Succession(
+            encoder_forward_pass, 
+            dot_convergence_animation
+        )
+        # Make an ellipse centered at mean_point witAnimationGraph std outline
+        center_dot = Dot(mean_point, radius=self.dot_radius, color=GREEN)
+        ellipse = Ellipse(width=std[0], height=std[1], color=RED, fill_opacity=0.5)
+        ellipse.move_to(mean_point)
+        ellipse_animation = AnimationGroup(
+            GrowFromCenter(center_dot), 
+            GrowFromCenter(ellipse),
+        )
+        # Make the dot divergence animation
+        dot_divergence_animation = self.make_dot_divergence_animation(mean_point, run_time=per_unit_runtime)
+        # Make decoder foward pass
+        decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime)
+        # Add the animations to the group
+        animation_group = AnimationGroup(
+            encoding_succesion,
+            ellipse_animation,
+            dot_divergence_animation,
+            decoder_forward_pass,
+            lag_ratio=1,
+        )
+
+        return animation_group
+
+"""
+    The VAE Scene for the twitter video. 
+"""
+
+config.pixel_height = 720 
+config.pixel_width = 720 
+config.frame_height = 10.0
+config.frame_width = 10.0
+# Set random seed so point distribution is constant
+np.random.seed(1)

 class VAEScene(Scene):
    """Scene object for a Variational Autoencoder and Autoencoder"""

    def construct(self):
+        # Set Scene config
+        vae = VariationalAutoencoder()
+        vae.move_to(ORIGIN)
+        vae.scale(1.2)
+        self.add(vae)
+        forward_pass_animation = vae.make_forward_pass_animation()
+        self.play(forward_pass_animation)
+        """
        autoencoder = Autoencoder()
        autoencoder.move_to(ORIGIN)
        # Make a forward pass animation
        self.add(autoencoder)
        forward_pass_animation = autoencoder.make_forward_pass_animation(run_time=1.5)
        self.play(forward_pass_animation)
+        """