diff --git a/Makefile b/Makefile index 36df903..e70d25d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,10 @@ video: manim -pqh src/vae.py VAEScene --media_dir media - cp media/videos/vae/1080p60/VAEScene.mp4 final_videos + cp media/videos/vae/720p60/VAEScene.mp4 final_videos +train: + cd src/autoencoder_models + python vanilla_autoencoder.py + python variational_autoencoder.py checkstyle: pycodestyle src pydocstyle src \ No newline at end of file diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte new file mode 100644 index 0000000..1170b2c Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte differ diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz new file mode 100644 index 0000000..5ace8ea Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz differ diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte new file mode 100644 index 0000000..d1c3a97 Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte differ diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz new file mode 100644 index 0000000..a7e1415 Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz differ diff --git a/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte new file mode 100644 index 0000000..bbce276 Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte differ diff --git a/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz new file mode 100644 index 0000000..b50e4b6 Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz differ diff --git a/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte new file mode 100644 index 0000000..d6b4c5d Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte differ diff --git a/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz new file mode 100644 index 0000000..707a576 Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz differ diff --git a/src/autoencoder_models/vanilla_autoencoder.py b/src/autoencoder_models/vanilla_autoencoder.py new file mode 100644 index 0000000..892f823 --- /dev/null +++ b/src/autoencoder_models/vanilla_autoencoder.py @@ -0,0 +1,112 @@ +import torch +from torchvision import datasets +from torchvision import transforms +import matplotlib.pyplot as plt +from tqdm import tqdm + +# Transforms images to a PyTorch Tensor +tensor_transform = transforms.ToTensor() + +# Download the MNIST Dataset +dataset = datasets.MNIST(root = "./data", + train = True, + download = True, + transform = tensor_transform) + +# DataLoader is used to load the dataset +# for training +loader = torch.utils.data.DataLoader(dataset = dataset, + batch_size = 32, + shuffle = True) + + # Creating a PyTorch class +# 28*28 ==> 9 ==> 28*28 +class VAE(torch.nn.Module): + def __init__(self): + super().__init__() + # Building an linear encoder with Linear + # layer followed by Relu activation function + # 784 ==> 9 + self.encoder = torch.nn.Sequential( + torch.nn.Linear(28 * 28, 128), + torch.nn.ReLU(), + torch.nn.Linear(128, 64), + torch.nn.ReLU(), + torch.nn.Linear(64, 36), + torch.nn.ReLU(), + torch.nn.Linear(36, 18), + torch.nn.ReLU(), + ) + self.mean_embedding = torch.nn.Linear(18, 9) + self.logvar_embedding = torch.nn.Linear(18, 9) + + # Building an linear decoder with Linear + # layer followed by Relu activation function + # The Sigmoid activation function + # outputs the value between 0 and 1 + # 9 ==> 784 + self.decoder = torch.nn.Sequential( + torch.nn.Linear(9, 18), + torch.nn.ReLU(), + torch.nn.Linear(18, 36), + torch.nn.ReLU(), + torch.nn.Linear(36, 64), + torch.nn.ReLU(), + torch.nn.Linear(64, 128), + torch.nn.ReLU(), + torch.nn.Linear(128, 28 * 28), + torch.nn.Sigmoid() + ) + + def forward(self, x): + encoded = self.encoder(x) + mean = self.mean_embedding(encoded) + logvar = self.logvar_embedding(encoded) + combined = torch.cat((mean, logvar), dim=1) + reconstructed = self.decoder(combined) + return mean, logvar, reconstructed, x + +# Model Initialization +model = VAE() +# Validation using MSE Loss function +def loss_function(mean, log_var, reconstructed, original): + kl = torch.mean(-0.5 * torch.sum(1 + log_var - mean ** 2 - log_var.exp(), dim = 1), dim = 0) + recon = torch.nn.functional.mse_loss(reconstructed, original) + + return kl + recon + +# Using an Adam Optimizer with lr = 0.1 +optimizer = torch.optim.Adam(model.parameters(), + lr = 1e-1, + weight_decay = 1e-8) + +epochs = 10 +outputs = [] +losses = [] +for epoch in tqdm(range(epochs)): + for (image, _) in loader: + # Reshaping the image to (-1, 784) + image = image.reshape(-1, 28*28) + # Output of Autoencoder + mean, log_var, reconstructed, image = model(image) + # Calculating the loss function + loss = loss_function(mean, log_var, reconstructed, image) + # The gradients are set to zero, + # the the gradient is computed and stored. + # .step() performs parameter update + optimizer.zero_grad() + loss.backward() + optimizer.step() + # Storing the losses in a list for plotting + losses.append(loss.detach().cpu()) + outputs.append((epochs, image, reconstructed)) + +# Defining the Plot Style +plt.style.use('fivethirtyeight') +plt.xlabel('Iterations') +plt.ylabel('Loss') + +# Plotting the last 100 values +print(losses) +plt.plot(losses[-100:]) +plt.show() \ No newline at end of file diff --git a/src/autoencoder_models/variational_autoencoder.py b/src/autoencoder_models/variational_autoencoder.py new file mode 100644 index 0000000..338a64e --- /dev/null +++ b/src/autoencoder_models/variational_autoencoder.py @@ -0,0 +1,102 @@ +import torch +from torchvision import datasets +from torchvision import transforms +import matplotlib.pyplot as plt +from tqdm import tqdm + +# Transforms images to a PyTorch Tensor +tensor_transform = transforms.ToTensor() + +# Download the MNIST Dataset +dataset = datasets.MNIST(root = "./data", + train = True, + download = True, + transform = tensor_transform) + +# DataLoader is used to load the dataset +# for training +loader = torch.utils.data.DataLoader(dataset = dataset, + batch_size = 32, + shuffle = True) + + # Creating a PyTorch class +# 28*28 ==> 9 ==> 28*28 +class AE(torch.nn.Module): + def __init__(self): + super().__init__() + + # Building an linear encoder with Linear + # layer followed by Relu activation function + # 784 ==> 9 + self.encoder = torch.nn.Sequential( + torch.nn.Linear(28 * 28, 128), + torch.nn.ReLU(), + torch.nn.Linear(128, 64), + torch.nn.ReLU(), + torch.nn.Linear(64, 36), + torch.nn.ReLU(), + torch.nn.Linear(36, 18), + torch.nn.ReLU(), + torch.nn.Linear(18, 9) + ) + + # Building an linear decoder with Linear + # layer followed by Relu activation function + # The Sigmoid activation function + # outputs the value between 0 and 1 + # 9 ==> 784 + self.decoder = torch.nn.Sequential( + torch.nn.Linear(9, 18), + torch.nn.ReLU(), + torch.nn.Linear(18, 36), + torch.nn.ReLU(), + torch.nn.Linear(36, 64), + torch.nn.ReLU(), + torch.nn.Linear(64, 128), + torch.nn.ReLU(), + torch.nn.Linear(128, 28 * 28), + torch.nn.Sigmoid() + ) + + def forward(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + +# Model Initialization +model = AE() +# Validation using MSE Loss function +loss_function = torch.nn.MSELoss() +# Using an Adam Optimizer with lr = 0.1 +optimizer = torch.optim.Adam(model.parameters(), + lr = 1e-1, + weight_decay = 1e-8) + +epochs = 10 +outputs = [] +losses = [] +for epoch in tqdm(range(epochs)): + for (image, _) in loader: + # Reshaping the image to (-1, 784) + image = image.reshape(-1, 28*28) + # Output of Autoencoder + reconstructed = model(image) + # Calculating the loss function + loss = loss_function(reconstructed, image) + # The gradients are set to zero, + # the the gradient is computed and stored. + # .step() performs parameter update + optimizer.zero_grad() + loss.backward() + optimizer.step() + # Storing the losses in a list for plotting + losses.append(loss.detach().cpu()) + outputs.append((epochs, image, reconstructed)) + +# Defining the Plot Style +plt.style.use('fivethirtyeight') +plt.xlabel('Iterations') +plt.ylabel('Loss') + +# Plotting the last 100 values +plt.plot(losses[-100:]) \ No newline at end of file diff --git a/src/neural_network.py b/src/neural_network.py index 4f5b844..4103c42 100644 --- a/src/neural_network.py +++ b/src/neural_network.py @@ -15,9 +15,9 @@ class NeuralNetworkLayer(VGroup): """Handles rendering a layer for a neural network""" def __init__( - self, num_nodes, layer_width=0.3, node_radius=0.2, + self, num_nodes, layer_width=0.2, node_radius=0.12, node_color=BLUE, node_outline_color=WHITE, rectangle_color=WHITE, - node_spacing=0.6, rectangle_fill_color=BLACK): + node_spacing=0.4, rectangle_fill_color=BLACK): super(VGroup, self).__init__() self.num_nodes = num_nodes self.layer_width = layer_width @@ -53,8 +53,8 @@ class NeuralNetwork(VGroup): def __init__( self, layer_node_count, layer_width=1.0, node_radius=1.0, - node_color=BLUE, edge_color=WHITE, layer_spacing=1.5, - animation_dot_color=ORANGE): + node_color=BLUE, edge_color=WHITE, layer_spacing=1.2, + animation_dot_color=RED): super(VGroup, self).__init__() self.layer_node_count = layer_node_count self.layer_width = layer_width @@ -63,6 +63,9 @@ class NeuralNetwork(VGroup): self.edge_color = edge_color self.layer_spacing = layer_spacing self.animation_dot_color = animation_dot_color + + # TODO take layer_node_count [0, (1, 2), 0] + # and make it have explicit distinct subspaces self.layers = self._construct_layers() self.edge_layers = self._construct_edges() @@ -105,19 +108,22 @@ class NeuralNetwork(VGroup): """Generates an animation for feed forward propogation""" all_animations = [] per_layer_run_time = run_time / len(self.edge_layers) + self.dots = VGroup() for edge_layer in self.edge_layers: path_animations = [] for edge in edge_layer: - dot = Dot(color=self.animation_dot_color, fill_opacity=1.0) + dot = Dot(color=self.animation_dot_color, fill_opacity=1.0, radius=0.06) # Handle layering dot.set_z_index(1) + # Add to dots group + self.dots.add(dot) # Make the animation - anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=linear) + anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=sigmoid) path_animations.append(anim) path_animation_group = AnimationGroup(*path_animations) all_animations.append(path_animation_group) - animation_group = AnimationGroup(*all_animations, lag_ratio=1) + animation_group = AnimationGroup(*all_animations, run_time=run_time, lag_ratio=1) return animation_group @@ -132,4 +138,9 @@ class TestNeuralNetworkScene(Scene): # Make Animation self.add(nn) forward_propagation_animation = nn.make_forward_propagation_animation() + + second_nn = NeuralNetwork([3, 4]) + self.add(second_nn) + self.play(forward_propagation_animation) + self.play(second_nn.make_forward_propagation_animation()) diff --git a/src/vae.py b/src/vae.py index 5e7a2b9..d137e8f 100644 --- a/src/vae.py +++ b/src/vae.py @@ -4,6 +4,7 @@ In this module I define Manim visualizations for Variational Autoencoders and Traditional Autoencoders. """ +from typing_extensions import runtime from manim import * import numpy as np import neural_network @@ -11,20 +12,21 @@ import neural_network class Autoencoder(VGroup): """Traditional Autoencoder Manim Visualization""" - def __init__(self, encoder_nodes_per_layer=[6, 4], decoder_nodes_per_layer=[4, 6], point_color=BLUE): + def __init__(self, encoder_nodes_per_layer=[5, 3], decoder_nodes_per_layer=[3, 5], point_color=BLUE, dot_radius=0.06): super(VGroup, self).__init__() self.encoder_nodes_per_layer = encoder_nodes_per_layer self.decoder_nodes_per_layer = decoder_nodes_per_layer self.point_color = point_color + self.dot_radius = dot_radius # Make the VMobjects self.encoder, self.decoder = self._construct_encoder_and_decoder() self.embedding = self._construct_embedding() # self.input_image, self.output_image = self._construct_input_output_images() # Setup the relative locations self.embedding.move_to(self.encoder) - self.embedding.shift([0.9 * self.embedding.width, 0, 0]) + self.embedding.shift([1.1 * self.encoder.width, 0, 0]) self.decoder.move_to(self.embedding) - self.decoder.shift([self.embedding.width * 0.9, 0, 0]) + self.decoder.shift([self.decoder.width * 1.1, 0, 0]) # self.embedding.shift(self.encoder.width * 1.5) # self.decoder.move_to(self.embedding.get_center()) # Add the objects to the VAE object @@ -50,32 +52,29 @@ class Autoencoder(VGroup): embedding = VGroup() # Sample points from a Gaussian num_points = 200 - standard_deviation = [1, 1] + standard_deviation = [0.7, 0.7] mean = [0, 0] points = np.random.normal(mean, standard_deviation, size=(num_points, 2)) # Make an axes embedding.axes = Axes( x_range=[-3, 3], y_range=[-3, 3], - x_length = 3, - y_length = 3, + x_length=2.5, + y_length=2.5, tips=False, ) # Add each point to the axes - point_dots = VGroup() + self.point_dots = VGroup() for point in points: point_location = embedding.axes.coords_to_point(*point) - dot = Dot(point_location, color=self.point_color) - point_dots.add(dot) + dot = Dot(point_location, color=self.point_color, radius=self.dot_radius / 2) + self.point_dots.add(dot) - embedding.add(point_dots) + embedding.add(self.point_dots) return embedding - def _construct_input_output_images(self): - pass - - def make_embedding_generation_animation(self): - """Animates the embedding getting created""" + def _construct_input_output_images(self, input_output_image_pairs): + """Places the input and output images for the AE""" pass def make_forward_pass_animation(self, run_time=2): @@ -84,18 +83,18 @@ class Autoencoder(VGroup): # Make encoder forward pass encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime) # Make red dot in embedding - location = np.random.normal(0, 1, (2)) + location = [1.0, 1.5] location_point = self.embedding.axes.coords_to_point(*location) - dot = Dot(location_point, color=RED) - create_dot_animation = Create(dot, run_time=per_unit_runtime) + # dot = Dot(location_point, color=RED) + # create_dot_animation = Create(dot, run_time=per_unit_runtime) # Make decoder foward pass decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime) # Add the animations to the group - animation_group = AnimationGroup( + animation_group = Succession( encoder_forward_pass, create_dot_animation, decoder_forward_pass, - lag_ratio=1 + lag_ratio=1, ) return animation_group @@ -108,19 +107,105 @@ class VariationalAutoencoder(Autoencoder): """Variational Autoencoder Manim Visualization""" def __init__(self): - super(self, Autoencoder).__init__() + super().__init__() - def make_forward_pass_animation(self): + def make_dot_convergence_animation(self, location, run_time=1.5): + """Makes dots converge on a specific location""" + # Move to location + animations = [] + for dot in self.encoder.dots: + coords = self.embedding.axes.coords_to_point(*location) + animations.append(dot.animate.move_to(coords)) + move_animations = AnimationGroup(*animations, run_time=1.5) + # Follow up with remove animations + remove_animations = [] + for dot in self.encoder.dots: + remove_animations.append(FadeOut(dot)) + remove_animations = AnimationGroup(*remove_animations, run_time=0.2) + + animation_group = Succession(move_animations, remove_animations, lag_ratio=1.0) + + return animation_group + + def make_dot_divergence_animation(self, location, run_time=3.0): + """Makes dots diverge from the given location and move the decoder""" + animations = [] + for node in self.decoder.layers[0].node_group: + new_dot = Dot(location, radius=self.dot_radius, color=RED) + per_node_succession = Succession( + Create(new_dot), + new_dot.animate.move_to(node.get_center()), + ) + animations.append(per_node_succession) + + animation_group = AnimationGroup(*animations) + return animation_group + + def make_forward_pass_animation(self, run_time=1.5): """Overriden forward pass animation specific to a VAE""" - return super().make_forward_pass_animation() + per_unit_runtime = run_time + # Make encoder forward pass + encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime) + # Make red dot in embedding + mean = [1.0, 1.5] + mean_point = self.embedding.axes.coords_to_point(*mean) + std = [0.8, 1.2] + # Make the dot convergence animation + dot_convergence_animation = self.make_dot_convergence_animation(mean, run_time=per_unit_runtime) + encoding_succesion = Succession( + encoder_forward_pass, + dot_convergence_animation + ) + # Make an ellipse centered at mean_point witAnimationGraph std outline + center_dot = Dot(mean_point, radius=self.dot_radius, color=GREEN) + ellipse = Ellipse(width=std[0], height=std[1], color=RED, fill_opacity=0.5) + ellipse.move_to(mean_point) + ellipse_animation = AnimationGroup( + GrowFromCenter(center_dot), + GrowFromCenter(ellipse), + ) + # Make the dot divergence animation + dot_divergence_animation = self.make_dot_divergence_animation(mean_point, run_time=per_unit_runtime) + # Make decoder foward pass + decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime) + # Add the animations to the group + animation_group = AnimationGroup( + encoding_succesion, + ellipse_animation, + dot_divergence_animation, + decoder_forward_pass, + lag_ratio=1, + ) + + return animation_group + +""" + The VAE Scene for the twitter video. +""" + +config.pixel_height = 720 +config.pixel_width = 720 +config.frame_height = 10.0 +config.frame_width = 10.0 +# Set random seed so point distribution is constant +np.random.seed(1) class VAEScene(Scene): """Scene object for a Variational Autoencoder and Autoencoder""" def construct(self): + # Set Scene config + vae = VariationalAutoencoder() + vae.move_to(ORIGIN) + vae.scale(1.2) + self.add(vae) + forward_pass_animation = vae.make_forward_pass_animation() + self.play(forward_pass_animation) + """ autoencoder = Autoencoder() autoencoder.move_to(ORIGIN) # Make a forward pass animation self.add(autoencoder) forward_pass_animation = autoencoder.make_forward_pass_animation(run_time=1.5) - self.play(forward_pass_animation) \ No newline at end of file + self.play(forward_pass_animation) + """ \ No newline at end of file