Working VAE forward pass animation. All I need now is valid images.

This commit is contained in:
Alec Helbling
2022-02-03 01:29:02 -05:00
committed by Alec Helbling
parent 1ec32f377a
commit 8140aec3be
13 changed files with 346 additions and 32 deletions

View File

@ -1,6 +1,10 @@
video:
manim -pqh src/vae.py VAEScene --media_dir media
cp media/videos/vae/1080p60/VAEScene.mp4 final_videos
cp media/videos/vae/720p60/VAEScene.mp4 final_videos
train:
cd src/autoencoder_models
python vanilla_autoencoder.py
python variational_autoencoder.py
checkstyle:
pycodestyle src
pydocstyle src

View File

@ -0,0 +1,112 @@
import torch
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from tqdm import tqdm
# Transforms images to a PyTorch Tensor
tensor_transform = transforms.ToTensor()
# Download the MNIST Dataset
dataset = datasets.MNIST(root = "./data",
train = True,
download = True,
transform = tensor_transform)
# DataLoader is used to load the dataset
# for training
loader = torch.utils.data.DataLoader(dataset = dataset,
batch_size = 32,
shuffle = True)
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class VAE(torch.nn.Module):
def __init__(self):
super().__init__()
# Building an linear encoder with Linear
# layer followed by Relu activation function
# 784 ==> 9
self.encoder = torch.nn.Sequential(
torch.nn.Linear(28 * 28, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 36),
torch.nn.ReLU(),
torch.nn.Linear(36, 18),
torch.nn.ReLU(),
)
self.mean_embedding = torch.nn.Linear(18, 9)
self.logvar_embedding = torch.nn.Linear(18, 9)
# Building an linear decoder with Linear
# layer followed by Relu activation function
# The Sigmoid activation function
# outputs the value between 0 and 1
# 9 ==> 784
self.decoder = torch.nn.Sequential(
torch.nn.Linear(9, 18),
torch.nn.ReLU(),
torch.nn.Linear(18, 36),
torch.nn.ReLU(),
torch.nn.Linear(36, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 28 * 28),
torch.nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
mean = self.mean_embedding(encoded)
logvar = self.logvar_embedding(encoded)
combined = torch.cat((mean, logvar), dim=1)
reconstructed = self.decoder(combined)
return mean, logvar, reconstructed, x
# Model Initialization
model = VAE()
# Validation using MSE Loss function
def loss_function(mean, log_var, reconstructed, original):
kl = torch.mean(-0.5 * torch.sum(1 + log_var - mean ** 2 - log_var.exp(), dim = 1), dim = 0)
recon = torch.nn.functional.mse_loss(reconstructed, original)
return kl + recon
# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
epochs = 10
outputs = []
losses = []
for epoch in tqdm(range(epochs)):
for (image, _) in loader:
# Reshaping the image to (-1, 784)
image = image.reshape(-1, 28*28)
# Output of Autoencoder
mean, log_var, reconstructed, image = model(image)
# Calculating the loss function
loss = loss_function(mean, log_var, reconstructed, image)
# The gradients are set to zero,
# the the gradient is computed and stored.
# .step() performs parameter update
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Storing the losses in a list for plotting
losses.append(loss.detach().cpu())
outputs.append((epochs, image, reconstructed))
# Defining the Plot Style
plt.style.use('fivethirtyeight')
plt.xlabel('Iterations')
plt.ylabel('Loss')
# Plotting the last 100 values
print(losses)
plt.plot(losses[-100:])
plt.show()

View File

@ -0,0 +1,102 @@
import torch
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from tqdm import tqdm
# Transforms images to a PyTorch Tensor
tensor_transform = transforms.ToTensor()
# Download the MNIST Dataset
dataset = datasets.MNIST(root = "./data",
train = True,
download = True,
transform = tensor_transform)
# DataLoader is used to load the dataset
# for training
loader = torch.utils.data.DataLoader(dataset = dataset,
batch_size = 32,
shuffle = True)
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class AE(torch.nn.Module):
def __init__(self):
super().__init__()
# Building an linear encoder with Linear
# layer followed by Relu activation function
# 784 ==> 9
self.encoder = torch.nn.Sequential(
torch.nn.Linear(28 * 28, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 36),
torch.nn.ReLU(),
torch.nn.Linear(36, 18),
torch.nn.ReLU(),
torch.nn.Linear(18, 9)
)
# Building an linear decoder with Linear
# layer followed by Relu activation function
# The Sigmoid activation function
# outputs the value between 0 and 1
# 9 ==> 784
self.decoder = torch.nn.Sequential(
torch.nn.Linear(9, 18),
torch.nn.ReLU(),
torch.nn.Linear(18, 36),
torch.nn.ReLU(),
torch.nn.Linear(36, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 28 * 28),
torch.nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
# Model Initialization
model = AE()
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
epochs = 10
outputs = []
losses = []
for epoch in tqdm(range(epochs)):
for (image, _) in loader:
# Reshaping the image to (-1, 784)
image = image.reshape(-1, 28*28)
# Output of Autoencoder
reconstructed = model(image)
# Calculating the loss function
loss = loss_function(reconstructed, image)
# The gradients are set to zero,
# the the gradient is computed and stored.
# .step() performs parameter update
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Storing the losses in a list for plotting
losses.append(loss.detach().cpu())
outputs.append((epochs, image, reconstructed))
# Defining the Plot Style
plt.style.use('fivethirtyeight')
plt.xlabel('Iterations')
plt.ylabel('Loss')
# Plotting the last 100 values
plt.plot(losses[-100:])

View File

@ -15,9 +15,9 @@ class NeuralNetworkLayer(VGroup):
"""Handles rendering a layer for a neural network"""
def __init__(
self, num_nodes, layer_width=0.3, node_radius=0.2,
self, num_nodes, layer_width=0.2, node_radius=0.12,
node_color=BLUE, node_outline_color=WHITE, rectangle_color=WHITE,
node_spacing=0.6, rectangle_fill_color=BLACK):
node_spacing=0.4, rectangle_fill_color=BLACK):
super(VGroup, self).__init__()
self.num_nodes = num_nodes
self.layer_width = layer_width
@ -53,8 +53,8 @@ class NeuralNetwork(VGroup):
def __init__(
self, layer_node_count, layer_width=1.0, node_radius=1.0,
node_color=BLUE, edge_color=WHITE, layer_spacing=1.5,
animation_dot_color=ORANGE):
node_color=BLUE, edge_color=WHITE, layer_spacing=1.2,
animation_dot_color=RED):
super(VGroup, self).__init__()
self.layer_node_count = layer_node_count
self.layer_width = layer_width
@ -64,6 +64,9 @@ class NeuralNetwork(VGroup):
self.layer_spacing = layer_spacing
self.animation_dot_color = animation_dot_color
# TODO take layer_node_count [0, (1, 2), 0]
# and make it have explicit distinct subspaces
self.layers = self._construct_layers()
self.edge_layers = self._construct_edges()
@ -105,19 +108,22 @@ class NeuralNetwork(VGroup):
"""Generates an animation for feed forward propogation"""
all_animations = []
per_layer_run_time = run_time / len(self.edge_layers)
self.dots = VGroup()
for edge_layer in self.edge_layers:
path_animations = []
for edge in edge_layer:
dot = Dot(color=self.animation_dot_color, fill_opacity=1.0)
dot = Dot(color=self.animation_dot_color, fill_opacity=1.0, radius=0.06)
# Handle layering
dot.set_z_index(1)
# Add to dots group
self.dots.add(dot)
# Make the animation
anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=linear)
anim = MoveAlongPath(dot, edge, run_time=per_layer_run_time, rate_function=sigmoid)
path_animations.append(anim)
path_animation_group = AnimationGroup(*path_animations)
all_animations.append(path_animation_group)
animation_group = AnimationGroup(*all_animations, lag_ratio=1)
animation_group = AnimationGroup(*all_animations, run_time=run_time, lag_ratio=1)
return animation_group
@ -132,4 +138,9 @@ class TestNeuralNetworkScene(Scene):
# Make Animation
self.add(nn)
forward_propagation_animation = nn.make_forward_propagation_animation()
second_nn = NeuralNetwork([3, 4])
self.add(second_nn)
self.play(forward_propagation_animation)
self.play(second_nn.make_forward_propagation_animation())

View File

@ -4,6 +4,7 @@ In this module I define Manim visualizations for Variational Autoencoders
and Traditional Autoencoders.
"""
from typing_extensions import runtime
from manim import *
import numpy as np
import neural_network
@ -11,20 +12,21 @@ import neural_network
class Autoencoder(VGroup):
"""Traditional Autoencoder Manim Visualization"""
def __init__(self, encoder_nodes_per_layer=[6, 4], decoder_nodes_per_layer=[4, 6], point_color=BLUE):
def __init__(self, encoder_nodes_per_layer=[5, 3], decoder_nodes_per_layer=[3, 5], point_color=BLUE, dot_radius=0.06):
super(VGroup, self).__init__()
self.encoder_nodes_per_layer = encoder_nodes_per_layer
self.decoder_nodes_per_layer = decoder_nodes_per_layer
self.point_color = point_color
self.dot_radius = dot_radius
# Make the VMobjects
self.encoder, self.decoder = self._construct_encoder_and_decoder()
self.embedding = self._construct_embedding()
# self.input_image, self.output_image = self._construct_input_output_images()
# Setup the relative locations
self.embedding.move_to(self.encoder)
self.embedding.shift([0.9 * self.embedding.width, 0, 0])
self.embedding.shift([1.1 * self.encoder.width, 0, 0])
self.decoder.move_to(self.embedding)
self.decoder.shift([self.embedding.width * 0.9, 0, 0])
self.decoder.shift([self.decoder.width * 1.1, 0, 0])
# self.embedding.shift(self.encoder.width * 1.5)
# self.decoder.move_to(self.embedding.get_center())
# Add the objects to the VAE object
@ -50,32 +52,29 @@ class Autoencoder(VGroup):
embedding = VGroup()
# Sample points from a Gaussian
num_points = 200
standard_deviation = [1, 1]
standard_deviation = [0.7, 0.7]
mean = [0, 0]
points = np.random.normal(mean, standard_deviation, size=(num_points, 2))
# Make an axes
embedding.axes = Axes(
x_range=[-3, 3],
y_range=[-3, 3],
x_length = 3,
y_length = 3,
x_length=2.5,
y_length=2.5,
tips=False,
)
# Add each point to the axes
point_dots = VGroup()
self.point_dots = VGroup()
for point in points:
point_location = embedding.axes.coords_to_point(*point)
dot = Dot(point_location, color=self.point_color)
point_dots.add(dot)
dot = Dot(point_location, color=self.point_color, radius=self.dot_radius / 2)
self.point_dots.add(dot)
embedding.add(point_dots)
embedding.add(self.point_dots)
return embedding
def _construct_input_output_images(self):
pass
def make_embedding_generation_animation(self):
"""Animates the embedding getting created"""
def _construct_input_output_images(self, input_output_image_pairs):
"""Places the input and output images for the AE"""
pass
def make_forward_pass_animation(self, run_time=2):
@ -84,18 +83,18 @@ class Autoencoder(VGroup):
# Make encoder forward pass
encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime)
# Make red dot in embedding
location = np.random.normal(0, 1, (2))
location = [1.0, 1.5]
location_point = self.embedding.axes.coords_to_point(*location)
dot = Dot(location_point, color=RED)
create_dot_animation = Create(dot, run_time=per_unit_runtime)
# dot = Dot(location_point, color=RED)
# create_dot_animation = Create(dot, run_time=per_unit_runtime)
# Make decoder foward pass
decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime)
# Add the animations to the group
animation_group = AnimationGroup(
animation_group = Succession(
encoder_forward_pass,
create_dot_animation,
decoder_forward_pass,
lag_ratio=1
lag_ratio=1,
)
return animation_group
@ -108,19 +107,105 @@ class VariationalAutoencoder(Autoencoder):
"""Variational Autoencoder Manim Visualization"""
def __init__(self):
super(self, Autoencoder).__init__()
super().__init__()
def make_forward_pass_animation(self):
def make_dot_convergence_animation(self, location, run_time=1.5):
"""Makes dots converge on a specific location"""
# Move to location
animations = []
for dot in self.encoder.dots:
coords = self.embedding.axes.coords_to_point(*location)
animations.append(dot.animate.move_to(coords))
move_animations = AnimationGroup(*animations, run_time=1.5)
# Follow up with remove animations
remove_animations = []
for dot in self.encoder.dots:
remove_animations.append(FadeOut(dot))
remove_animations = AnimationGroup(*remove_animations, run_time=0.2)
animation_group = Succession(move_animations, remove_animations, lag_ratio=1.0)
return animation_group
def make_dot_divergence_animation(self, location, run_time=3.0):
"""Makes dots diverge from the given location and move the decoder"""
animations = []
for node in self.decoder.layers[0].node_group:
new_dot = Dot(location, radius=self.dot_radius, color=RED)
per_node_succession = Succession(
Create(new_dot),
new_dot.animate.move_to(node.get_center()),
)
animations.append(per_node_succession)
animation_group = AnimationGroup(*animations)
return animation_group
def make_forward_pass_animation(self, run_time=1.5):
"""Overriden forward pass animation specific to a VAE"""
return super().make_forward_pass_animation()
per_unit_runtime = run_time
# Make encoder forward pass
encoder_forward_pass = self.encoder.make_forward_propagation_animation(run_time=per_unit_runtime)
# Make red dot in embedding
mean = [1.0, 1.5]
mean_point = self.embedding.axes.coords_to_point(*mean)
std = [0.8, 1.2]
# Make the dot convergence animation
dot_convergence_animation = self.make_dot_convergence_animation(mean, run_time=per_unit_runtime)
encoding_succesion = Succession(
encoder_forward_pass,
dot_convergence_animation
)
# Make an ellipse centered at mean_point witAnimationGraph std outline
center_dot = Dot(mean_point, radius=self.dot_radius, color=GREEN)
ellipse = Ellipse(width=std[0], height=std[1], color=RED, fill_opacity=0.5)
ellipse.move_to(mean_point)
ellipse_animation = AnimationGroup(
GrowFromCenter(center_dot),
GrowFromCenter(ellipse),
)
# Make the dot divergence animation
dot_divergence_animation = self.make_dot_divergence_animation(mean_point, run_time=per_unit_runtime)
# Make decoder foward pass
decoder_forward_pass = self.decoder.make_forward_propagation_animation(run_time=per_unit_runtime)
# Add the animations to the group
animation_group = AnimationGroup(
encoding_succesion,
ellipse_animation,
dot_divergence_animation,
decoder_forward_pass,
lag_ratio=1,
)
return animation_group
"""
The VAE Scene for the twitter video.
"""
config.pixel_height = 720
config.pixel_width = 720
config.frame_height = 10.0
config.frame_width = 10.0
# Set random seed so point distribution is constant
np.random.seed(1)
class VAEScene(Scene):
"""Scene object for a Variational Autoencoder and Autoencoder"""
def construct(self):
# Set Scene config
vae = VariationalAutoencoder()
vae.move_to(ORIGIN)
vae.scale(1.2)
self.add(vae)
forward_pass_animation = vae.make_forward_pass_animation()
self.play(forward_pass_animation)
"""
autoencoder = Autoencoder()
autoencoder.move_to(ORIGIN)
# Make a forward pass animation
self.add(autoencoder)
forward_pass_animation = autoencoder.make_forward_pass_animation(run_time=1.5)
self.play(forward_pass_animation)
"""