diff --git a/Makefile b/Makefile
index 36df903..e70d25d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,10 @@
 video:
 	manim -pqh src/vae.py VAEScene --media_dir media
-	cp media/videos/vae/1080p60/VAEScene.mp4 final_videos
+	cp media/videos/vae/720p60/VAEScene.mp4 final_videos
+train:
+	cd src/autoencoder_models
+	python vanilla_autoencoder.py
+	python variational_autoencoder.py
 checkstyle:
 	pycodestyle src
 	pydocstyle src
\ No newline at end of file
diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte
new file mode 100644
index 0000000..1170b2c
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte differ
diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz
new file mode 100644
index 0000000..5ace8ea
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-images-idx3-ubyte.gz differ
diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte
new file mode 100644
index 0000000..d1c3a97
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte differ
diff --git a/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz
new file mode 100644
index 0000000..a7e1415
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/t10k-labels-idx1-ubyte.gz differ
diff --git a/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte
new file mode 100644
index 0000000..bbce276
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte differ
diff --git a/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz
new file mode 100644
index 0000000..b50e4b6
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-images-idx3-ubyte.gz differ
diff --git a/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte
new file mode 100644
index 0000000..d6b4c5d
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte differ
diff --git a/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz
new file mode 100644
index 0000000..707a576
Binary files /dev/null and b/src/autoencoder_models/data/MNIST/raw/train-labels-idx1-ubyte.gz differ
diff --git a/src/autoencoder_models/vanilla_autoencoder.py b/src/autoencoder_models/vanilla_autoencoder.py
new file mode 100644
index 0000000..892f823
--- /dev/null
+++ b/src/autoencoder_models/vanilla_autoencoder.py
@@ -0,0 +1,112 @@
+import torch
+from torchvision import datasets
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+# Transforms images to a PyTorch Tensor
+tensor_transform = transforms.ToTensor()
+  
+# Download the MNIST Dataset
+dataset = datasets.MNIST(root = "./data",
+                         train = True,
+                         download = True,
+                         transform = tensor_transform)
+  
+# DataLoader is used to load the dataset 
+# for training
+loader = torch.utils.data.DataLoader(dataset = dataset,
+                                     batch_size = 32,
+                                     shuffle = True)
+
+                                     # Creating a PyTorch class
+# 28*28 ==> 9 ==> 28*28
+class VAE(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        # Building an linear encoder with Linear
+        # layer followed by Relu activation function
+        # 784 ==> 9
+        self.encoder = torch.nn.Sequential(
+            torch.nn.Linear(28 * 28, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 18),
+            torch.nn.ReLU(),
+        )
+        self.mean_embedding = torch.nn.Linear(18, 9)
+        self.logvar_embedding = torch.nn.Linear(18, 9)
+
+        # Building an linear decoder with Linear
+        # layer followed by Relu activation function
+        # The Sigmoid activation function
+        # outputs the value between 0 and 1
+        # 9 ==> 784
+        self.decoder = torch.nn.Sequential(
+            torch.nn.Linear(9, 18),
+            torch.nn.ReLU(),
+            torch.nn.Linear(18, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 28 * 28),
+            torch.nn.Sigmoid()
+        )
+  
+    def forward(self, x):
+        encoded = self.encoder(x)
+        mean = self.mean_embedding(encoded)
+        logvar = self.logvar_embedding(encoded)
+        combined = torch.cat((mean, logvar), dim=1)
+        reconstructed = self.decoder(combined)
+        return mean, logvar, reconstructed, x
+
+# Model Initialization
+model = VAE()
+# Validation using MSE Loss function
+def loss_function(mean, log_var, reconstructed, original):
+    kl = torch.mean(-0.5 * torch.sum(1 + log_var - mean ** 2 - log_var.exp(), dim = 1), dim = 0)
+    recon = torch.nn.functional.mse_loss(reconstructed, original)
+
+    return kl + recon
+
+# Using an Adam Optimizer with lr = 0.1
+optimizer = torch.optim.Adam(model.parameters(),
+                             lr = 1e-1,
+                             weight_decay = 1e-8)
+
+epochs = 10
+outputs = []
+losses = []
+for epoch in tqdm(range(epochs)):
+    for (image, _) in loader:
+      # Reshaping the image to (-1, 784)
+      image = image.reshape(-1, 28*28)
+      # Output of Autoencoder
+      mean, log_var, reconstructed, image = model(image)
+      # Calculating the loss function
+      loss = loss_function(mean, log_var, reconstructed, image)
+      # The gradients are set to zero,
+      # the the gradient is computed and stored.
+      # .step() performs parameter update
+      optimizer.zero_grad()
+      loss.backward()
+      optimizer.step()
+      # Storing the losses in a list for plotting
+      losses.append(loss.detach().cpu())
+    outputs.append((epochs, image, reconstructed))
+
+# Defining the Plot Style
+plt.style.use('fivethirtyeight')
+plt.xlabel('Iterations')
+plt.ylabel('Loss')
+  
+# Plotting the last 100 values
+print(losses)
+plt.plot(losses[-100:])
+plt.show()
\ No newline at end of file
diff --git a/src/autoencoder_models/variational_autoencoder.py b/src/autoencoder_models/variational_autoencoder.py
new file mode 100644
index 0000000..338a64e
--- /dev/null
+++ b/src/autoencoder_models/variational_autoencoder.py
@@ -0,0 +1,102 @@
+import torch
+from torchvision import datasets
+from torchvision import transforms
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+# Transforms images to a PyTorch Tensor
+tensor_transform = transforms.ToTensor()
+  
+# Download the MNIST Dataset
+dataset = datasets.MNIST(root = "./data",
+                         train = True,
+                         download = True,
+                         transform = tensor_transform)
+  
+# DataLoader is used to load the dataset 
+# for training
+loader = torch.utils.data.DataLoader(dataset = dataset,
+                                     batch_size = 32,
+                                     shuffle = True)
+
+                                     # Creating a PyTorch class
+# 28*28 ==> 9 ==> 28*28
+class AE(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+          
+        # Building an linear encoder with Linear
+        # layer followed by Relu activation function
+        # 784 ==> 9
+        self.encoder = torch.nn.Sequential(
+            torch.nn.Linear(28 * 28, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 18),
+            torch.nn.ReLU(),
+            torch.nn.Linear(18, 9)
+        )
+          
+        # Building an linear decoder with Linear
+        # layer followed by Relu activation function
+        # The Sigmoid activation function
+        # outputs the value between 0 and 1
+        # 9 ==> 784
+        self.decoder = torch.nn.Sequential(
+            torch.nn.Linear(9, 18),
+            torch.nn.ReLU(),
+            torch.nn.Linear(18, 36),
+            torch.nn.ReLU(),
+            torch.nn.Linear(36, 64),
+            torch.nn.ReLU(),
+            torch.nn.Linear(64, 128),
+            torch.nn.ReLU(),
+            torch.nn.Linear(128, 28 * 28),
+            torch.nn.Sigmoid()
+        )
+  
+    def forward(self, x):
+        encoded = self.encoder(x)
+        decoded = self.decoder(encoded)
+        return decoded
+
+# Model Initialization
+model = AE()
+# Validation using MSE Loss function
+loss_function = torch.nn.MSELoss()
+# Using an Adam Optimizer with lr = 0.1
+optimizer = torch.optim.Adam(model.parameters(),
+                             lr = 1e-1,
+                             weight_decay = 1e-8)
+
+epochs = 10
+outputs = []
+losses = []
+for epoch in tqdm(range(epochs)):
+    for (image, _) in loader:
+      # Reshaping the image to (-1, 784)
+      image = image.reshape(-1, 28*28)
+      # Output of Autoencoder
+      reconstructed = model(image)
+      # Calculating the loss function
+      loss = loss_function(reconstructed, image)
+      # The gradients are set to zero,
+      # the the gradient is computed and stored.
+      # .step() performs parameter update
+      optimizer.zero_grad()
+      loss.backward()
+      optimizer.step()
+      # Storing the losses in a list for plotting
+      losses.append(loss.detach().cpu())
+    outputs.append((epochs, image, reconstructed))
+  
+# Defining the Plot Style
+plt.style.use('fivethirtyeight')
+plt.xlabel('Iterations')
+plt.ylabel('Loss')
+  
+# Plotting the last 100 values
+plt.plot(losses[-100:])
\ No newline at end of file
diff --git a/src/neural_network.py b/src/neural_network.py
index 4f5b844..4103c42 100644
--- a/src/neural_network.py
+++ b/src/neural_network.py
@@ -15,9 +15,9 @@ class NeuralNetworkLayer(VGroup):
     """Handles rendering a layer for a neural network"""
 
     def __init__(
-            self, num_nodes, layer_width=0.3, node_radius=0.2, 
+            self, num_nodes, layer_width=0.2, node_radius=0.12, 
             node_color=BLUE, node_outline_color=WHITE, rectangle_color=WHITE,
-            node_spacing=0.6, rectangle_fill_color=BLACK):
+            node_spacing=0.4, rectangle_fill_color=BLACK):
         super(VGroup, self).__init__()
         self.num_nodes = num_nodes
         self.layer_width = layer_width
@@ -53,8 +53,8 @@ class NeuralNetwork(VGroup):
 
     def __init__(
             self, layer_node_count, layer_width=1.0, node_radius=1.0, 
-            node_color=BLUE, edge_color=WHITE, layer_spacing=1.5,
-            animation_dot_color=ORANGE):
+            node_color=BLUE, edge_color=WHITE, layer_spacing=1.2,
+            animation_dot_color=RED):
         super(VGroup, self).__init__()
         self.layer_node_count = layer_node_count
         self.layer_width = layer_width
@@ -63,6 +63,9 @@ class NeuralNetwork(VGroup):
         self.edge_color = edge_color
         self.layer_spacing = layer_spacing
         self.animation_dot_color = animation_dot_color
+
+        # TODO take layer_node_count [0, (1, 2), 0] 
+        # and make it have explicit distinct subspaces
         
         self.layers = self._construct_layers()
         self.edge_layers = self._construct_edges()
@@ -105,19 +108,22 @@ class NeuralNetwork(VGroup):
         """Generates an animation for feed forward propogation"""
         all_animations = []
         per_layer_run_time = run_time / len(self.edge_layers)
+        self.dots = VGroup()
         for edge_layer in self.edge_layers:
             path_animations = []
             for edge in edge_layer:
-                dot = Dot(color=self.animation_dot_color, fill_opacity=1.0)
+                dot = Dot(color=self.animation_dot_color, fill_opacity=1.0, radius=0.06)
                 # Handle layering
                 dot.set_z_index(1)
+                # Add to dots group
+                self.dots.add(dot)
                 # Make the animation
-                anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=linear)
+                anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=sigmoid)
                 path_animations.append(anim)
             path_animation_group = AnimationGroup(*path_animations)
             all_animations.append(path_animation_group)
 
-        animation_group = AnimationGroup(*all_animations, lag_ratio=1)
+        animation_group = AnimationGroup(*all_animations, run_time=run_time, lag_ratio=1)
 
         return animation_group
 
@@ -132,4 +138,9 @@ class TestNeuralNetworkScene(Scene):
         # Make Animation
         self.add(nn)
         forward_propagation_animation = nn.make_forward_propagation_animation()
+
+        second_nn = NeuralNetwork([3, 4])
+        self.add(second_nn)
+
         self.play(forward_propagation_animation)
+        self.play(second_nn.make_forward_propagation_animation())
diff --git a/src/vae.py b/src/vae.py
index 5e7a2b9..d137e8f 100644
--- a/src/vae.py
+++ b/src/vae.py
@@ -4,6 +4,7 @@ In this module I define Manim visualizations for Variational Autoencoders
 and Traditional Autoencoders.
 
 """
+from typing_extensions import runtime
 from manim import *
 import numpy as np
 import neural_network
@@ -11,20 +12,21 @@ import neural_network
 class Autoencoder(VGroup):
     """Traditional Autoencoder Manim Visualization"""
 
-    def __init__(self, encoder_nodes_per_layer=[6, 4], decoder_nodes_per_layer=[4, 6], point_color=BLUE):
+    def __init__(self, encoder_nodes_per_layer=[5, 3], decoder_nodes_per_layer=[3, 5], point_color=BLUE, dot_radius=0.06):
         super(VGroup, self).__init__()
         self.encoder_nodes_per_layer = encoder_nodes_per_layer
         self.decoder_nodes_per_layer = decoder_nodes_per_layer
         self.point_color = point_color
+        self.dot_radius = dot_radius
         # Make the VMobjects
         self.encoder, self.decoder = self._construct_encoder_and_decoder()
         self.embedding = self._construct_embedding()
         # self.input_image, self.output_image = self._construct_input_output_images()
         # Setup the relative locations
         self.embedding.move_to(self.encoder)
-        self.embedding.shift([0.9 * self.embedding.width, 0, 0])
+        self.embedding.shift([1.1 * self.encoder.width, 0, 0])
         self.decoder.move_to(self.embedding)
-        self.decoder.shift([self.embedding.width * 0.9, 0, 0])
+        self.decoder.shift([self.decoder.width * 1.1, 0, 0])
         # self.embedding.shift(self.encoder.width * 1.5)
         # self.decoder.move_to(self.embedding.get_center())
         # Add the objects to the VAE object
@@ -50,32 +52,29 @@ class Autoencoder(VGroup):
         embedding = VGroup()
         # Sample points from a Gaussian
         num_points = 200
-        standard_deviation = [1, 1]
+        standard_deviation = [0.7, 0.7]
         mean = [0, 0]
         points = np.random.normal(mean, standard_deviation, size=(num_points, 2))
         # Make an axes
         embedding.axes = Axes(
             x_range=[-3, 3],
             y_range=[-3, 3],
-            x_length = 3,
-            y_length = 3,
+            x_length=2.5,
+            y_length=2.5,
             tips=False,
         )
         # Add each point to the axes
-        point_dots = VGroup()
+        self.point_dots = VGroup()
         for point in points:
             point_location = embedding.axes.coords_to_point(*point)
-            dot = Dot(point_location, color=self.point_color) 
-            point_dots.add(dot)
+            dot = Dot(point_location, color=self.point_color, radius=self.dot_radius / 2) 
+            self.point_dots.add(dot)
 
-        embedding.add(point_dots)
+        embedding.add(self.point_dots)
         return embedding
 
-    def _construct_input_output_images(self):
-        pass
-
-    def make_embedding_generation_animation(self):
-        """Animates the embedding getting created"""
+    def _construct_input_output_images(self, input_output_image_pairs):
+        """Places the input and output images for the AE"""
         pass
 
     def make_forward_pass_animation(self, run_time=2):
@@ -84,18 +83,18 @@ class Autoencoder(VGroup):
         # Make encoder forward pass
         encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime)
         # Make red dot in embedding
-        location = np.random.normal(0, 1, (2))
+        location = [1.0, 1.5]
         location_point = self.embedding.axes.coords_to_point(*location)
-        dot = Dot(location_point, color=RED)
-        create_dot_animation = Create(dot, run_time=per_unit_runtime)
+        # dot = Dot(location_point, color=RED)
+        # create_dot_animation = Create(dot, run_time=per_unit_runtime)
         # Make decoder foward pass
         decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime)
         # Add the animations to the group
-        animation_group = AnimationGroup(
+        animation_group = Succession(
             encoder_forward_pass,
             create_dot_animation,
             decoder_forward_pass,
-            lag_ratio=1
+            lag_ratio=1,
         )
 
         return animation_group
@@ -108,19 +107,105 @@ class VariationalAutoencoder(Autoencoder):
     """Variational Autoencoder Manim Visualization"""
     
     def __init__(self):
-        super(self, Autoencoder).__init__()
+        super().__init__()
 
-    def make_forward_pass_animation(self):
+    def make_dot_convergence_animation(self, location, run_time=1.5):
+        """Makes dots converge on a specific location"""
+        # Move to location
+        animations = []
+        for dot in self.encoder.dots:
+            coords = self.embedding.axes.coords_to_point(*location)
+            animations.append(dot.animate.move_to(coords))
+        move_animations = AnimationGroup(*animations, run_time=1.5)
+        # Follow up with remove animations
+        remove_animations = []
+        for dot in self.encoder.dots:
+            remove_animations.append(FadeOut(dot))
+        remove_animations = AnimationGroup(*remove_animations, run_time=0.2)
+
+        animation_group = Succession(move_animations, remove_animations, lag_ratio=1.0)
+
+        return animation_group
+
+    def make_dot_divergence_animation(self, location, run_time=3.0):
+        """Makes dots diverge from the given location and move the decoder"""
+        animations = []
+        for node in self.decoder.layers[0].node_group:
+            new_dot = Dot(location, radius=self.dot_radius, color=RED)
+            per_node_succession = Succession(
+                Create(new_dot),
+                new_dot.animate.move_to(node.get_center()),
+            )
+            animations.append(per_node_succession)
+
+        animation_group = AnimationGroup(*animations)
+        return animation_group
+
+    def make_forward_pass_animation(self, run_time=1.5):
         """Overriden forward pass animation specific to a VAE"""
-        return super().make_forward_pass_animation()
+        per_unit_runtime = run_time
+        # Make encoder forward pass
+        encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime)
+        # Make red dot in embedding
+        mean = [1.0, 1.5]
+        mean_point = self.embedding.axes.coords_to_point(*mean)
+        std = [0.8, 1.2]
+        # Make the dot convergence animation
+        dot_convergence_animation = self.make_dot_convergence_animation(mean, run_time=per_unit_runtime)
+        encoding_succesion = Succession(
+            encoder_forward_pass, 
+            dot_convergence_animation
+        )
+        # Make an ellipse centered at mean_point witAnimationGraph std outline
+        center_dot = Dot(mean_point, radius=self.dot_radius, color=GREEN)
+        ellipse = Ellipse(width=std[0], height=std[1], color=RED, fill_opacity=0.5)
+        ellipse.move_to(mean_point)
+        ellipse_animation = AnimationGroup(
+            GrowFromCenter(center_dot), 
+            GrowFromCenter(ellipse),
+        )
+        # Make the dot divergence animation
+        dot_divergence_animation = self.make_dot_divergence_animation(mean_point, run_time=per_unit_runtime)
+        # Make decoder foward pass
+        decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime)
+        # Add the animations to the group
+        animation_group = AnimationGroup(
+            encoding_succesion,
+            ellipse_animation,
+            dot_divergence_animation,
+            decoder_forward_pass,
+            lag_ratio=1,
+        )
+
+        return animation_group
+
+"""
+    The VAE Scene for the twitter video. 
+"""
+
+config.pixel_height = 720 
+config.pixel_width = 720 
+config.frame_height = 10.0
+config.frame_width = 10.0
+# Set random seed so point distribution is constant
+np.random.seed(1)
 
 class VAEScene(Scene):
     """Scene object for a Variational Autoencoder and Autoencoder"""
 
     def construct(self):
+        # Set Scene config
+        vae = VariationalAutoencoder()
+        vae.move_to(ORIGIN)
+        vae.scale(1.2)
+        self.add(vae)
+        forward_pass_animation = vae.make_forward_pass_animation()
+        self.play(forward_pass_animation)
+        """
         autoencoder = Autoencoder()
         autoencoder.move_to(ORIGIN)
         # Make a forward pass animation
         self.add(autoencoder)
         forward_pass_animation = autoencoder.make_forward_pass_animation(run_time=1.5)
-        self.play(forward_pass_animation)
\ No newline at end of file
+        self.play(forward_pass_animation)
+        """
\ No newline at end of file