multiplication

This commit is contained in:
Varuna Jayasiri
2022-06-09 17:25:02 +05:30
parent 0ce0aba12a
commit 092b8ddaf4
3 changed files with 373 additions and 6 deletions

View File

@ -96,7 +96,7 @@ class ArithmeticAdditionDataset(Dataset):
x = self.make_int(n_digits=random.randrange(1, self.max_digits + 1))
y = self.make_int(n_digits=random.randrange(1, self.max_digits + 1))
return f'x={x}+{y};', f'{x + y}'
return f'?x={x}+{y};', f'{x + y}'
def get_packed_math_input(self):
"""
@ -197,17 +197,17 @@ class ArithmeticAdditionAutoregression(NLPAutoRegressionConfigs):
# Get the model prediction (greedy)
output = output[-1].argmax(dim=-1)
# Override with the question
for j, p in enumerate(questions):
if len(p) > i + 1:
output[j] = dataset.stoi[p[i + 1]]
# Find which sequences have finished
finished = finished | (output == new_line)
# Skip if all have finished
if finished.sum() == len(finished):
continue
# Override with the question
for j, p in enumerate(questions):
if len(p) > i + 1:
output[j] = dataset.stoi[p[i + 1]]
# Add the next token to the input
data = torch.cat([data, output[None, :]], dim=0)

View File

@ -0,0 +1,273 @@
"""
---
title: Arithmetic Dataset
summary: >
This creates arithmetic problems.
---
*This is based on code by [Georges Harik (@gharik)](https://twitter.com/gharik).*
"""
import random
import string
from typing import List
import torch
from torch.utils.data import DataLoader, Dataset
from labml import monit, logger, tracker
from labml.configs import option
from labml.logger import Text
from labml_nn.experiments.nlp_autoregression import NLPAutoRegressionConfigs, transpose_batch
class ArithmeticMultiplicationDataset(Dataset):
"""
## Arithmetic Dataset
This creates arithmetic addition problems and solutions with workings.
We've only implemented addition so far.
It's based on a character level tokenization.
"""
def __init__(self, seq_len: int, max_digits: int, base: int, n_sequences: int):
"""
:param seq_len: is the sequence length of generated math problems.
We fill as many problems as possible upto this length
:max_digits: is the maximum number of digits in the operand integers
:n_sequences: is the number of sequences per epoch
"""
self.base = base
self.n_sequences = n_sequences
self.max_digits = max_digits
self.seq_len = seq_len
# Token id to string
self.itos = list(string.digits + 'x =\n?*;')
# Character to token id
self.stoi = {c: i for i, c in enumerate(self.itos)}
def make_int(self, n_digits: int):
"""
Generates an integer with `n_digit` number of digits
"""
res = 0
for i in range(n_digits):
d = random.randrange(1, self.base + 1) if i == 0 else random.randrange(0, self.base + 1)
res = res * self.base + d
return res
def get_add_explanation(self, x: int, y: int):
"""
Generates the workings for `x + y`.
For example for `11+29` it generates
`1e0+9e0+0e0=10e0 1e0+2e0+1e0=4e0`.
"""
explanation = []
while x > 0:
rx = x % self.base
explanation.append(f"{self.to_string(y * rx)}")
x = x // self.base
return ' '.join(explanation)
# Make a problem with a pre_explanation or not
def make_add_problem(self):
"""
Creates an arithmetic addition problem with workings and answer.
"""
x = self.make_int(n_digits=random.randrange(1, self.max_digits + 1))
y = self.make_int(n_digits=random.randrange(1, self.max_digits + 1))
explanation = self.get_add_explanation(x, y)
return f"x={self.to_string(x)}*{self.to_string(y)}; {explanation} x=={self.to_string(x * y)}\n"
def to_string(self, x: int):
if x == 0:
return '0'
a = []
while x > 0:
a += [f'{x % self.base}']
x = x // self.base
return ''.join(reversed(a))
def get_qa(self):
"""
Get arithmetic problem and answer. This is used for evaluation.
"""
x = self.make_int(n_digits=random.randrange(1, self.max_digits + 1))
y = self.make_int(n_digits=random.randrange(1, self.max_digits + 1))
return f'?x={self.to_string(x)}*{self.to_string(y)};', f'{self.to_string(x * y)}'
def get_packed_math_input(self):
"""
Generate multiple problems and pack them into a sequence.
"""
s_enc = []
mask = []
while len(s_enc) <= self.seq_len:
s_part = self.make_add_problem()
s_part_enc = self.encode('?' + s_part)
prob, sol = s_part.split(';')
mask += [False] * (len(prob) + 2)
mask += [True] * len(sol)
s_enc = s_enc + s_part_enc
return s_enc, mask
def encode(self, s: str):
"""
Encode a given string
"""
return [self.stoi[c] for c in s]
def decode(self, arr: List[int]):
"""
Decode a list of token ids
"""
return ''.join([self.itos[c] for c in arr])
def __getitem__(self, idx: int):
"""
Get a input and target pair for auto-regressive modelling
"""
s, mask = self.get_packed_math_input()
s = torch.tensor(s)
mask = torch.tensor(mask)
target = s * mask + -1 * (~mask)
return s[:self.seq_len], target[1:self.seq_len + 1]
def __len__(self):
"""
Number of sequences per epoch
"""
return self.n_sequences
class ArithmeticMultiplicationAutoregression(NLPAutoRegressionConfigs):
"""
## Arithmetic Task Experiment Configurations
"""
# Maximum number of digits per operand integer
max_digits: int = 4
# Number of training sequences per epoch
train_sequences_per_epoch: int = 2 ** 12
# Training data loader
train_loader: DataLoader = 'arithmetic_train_loader'
# Number of problems in evaluation
n_tests: int = 64
# No need of a validation dataset
validator = None
# Number of times to run evaluations per epoch
inner_iterations = 4
# Number of tokens in the vocabulary
base: int = 10
n_tokens = len(ArithmeticMultiplicationDataset(1, 1, 1, 1).itos)
@torch.no_grad()
def sample(self):
"""
### Evaluation
We use the sampling function to evaluate the model on a set of problems
"""
# Skip in the first epoch
if self.training_loop.idx < 1:
return
# Create a dataset to generate problems
dataset = ArithmeticMultiplicationDataset(self.seq_len, self.max_digits, self.base, 1)
# Get a set of problems and answers
qa = [dataset.get_qa() for _ in range(self.n_tests)]
# Collect the problems only
questions = [p[0] for p in qa]
# Create a tensor with only the initial token
data = torch.tensor([[dataset.stoi[p[0]] for p in questions]])
# Move to device
data = data.to(self.device)
# Number of sequences that have completed
finished = torch.zeros((len(questions),)).bool().to(self.device)
# Token id of the new line character - this marks end of the answer
new_line = dataset.stoi['\n']
# Sampled results
results = [p[0] for p in questions]
# Sample upto sequence length
for i in monit.iterate('Sample', self.seq_len - 1):
# If all the sequences have completed we skip this
if finished.sum() == len(finished):
continue
# Get the model output
output, *_ = self.model(data)
# Get the model prediction (greedy)
output = output[-1].argmax(dim=-1)
# Override with the question
for j, p in enumerate(questions):
if len(p) > i + 1:
output[j] = dataset.stoi[p[i + 1]]
# Find which sequences have finished
finished = finished | (output == new_line)
# Skip if all have finished
if finished.sum() == len(finished):
continue
# Add the next token to the input
data = torch.cat([data, output[None, :]], dim=0)
# Get the sampled results
for j, c in enumerate(output):
results[j] += dataset.itos[c]
# Discard everything after the answer in the results
results = [r.split('\n')[0] for r in results]
# Log a sample
res_sample = results[0].split(';')
logger.log([(res_sample[0], Text.key), (';', Text.subtle), (';'.join(res_sample[1:]), Text.none)])
# Get the answers
results = [r.split('x==')[-1] for r in results]
# Count the number of correct answers
correct = 0
for r, _qa in zip(results, qa):
if r == _qa[1]:
correct += 1
# Log the score
tracker.save('score', correct / len(results))
@option(ArithmeticMultiplicationAutoregression.train_loader)
def arithmetic_train_loader(c: ArithmeticMultiplicationAutoregression):
"""
Training data loader
"""
return DataLoader(ArithmeticMultiplicationDataset(c.seq_len, c.max_digits, c.base, c.train_sequences_per_epoch),
batch_size=c.batch_size,
collate_fn=transpose_batch,
num_workers=4)
def _test():
"""
Code to test generated problems
"""
dataset = ArithmeticMultiplicationDataset(256, 4, 4, 10)
print(dataset.decode(dataset.get_packed_math_input()[0]))
#
if __name__ == '__main__':
_test()

View File

@ -0,0 +1,94 @@
"""
---
title: Rotary Positional Embeddings with Relative distance (RoPER) Experiment
summary: This experiment trains a transformer model with Rotary Positional Embeddings with
Relative Distance (RoPER) on the arithmetic addition task.
---
# Rotary Positional Embeddings with Relative distance ([RoPER](index.html)) Experiment
"""
from labml import experiment
from labml.configs import calculate
from labml_nn.experiments.algo_tasks.arithmetic_multiplication import ArithmeticMultiplicationAutoregression
from labml_nn.transformers import TransformerConfigs
from labml_nn.transformers.rope.experiment import Configs as RoPEConfigs
class Configs(RoPEConfigs, ArithmeticMultiplicationAutoregression):
"""
We inherit [RoPE experiment](../experiment.html) and use it for
[arithmetic addition task](../../experiments/arithmetic_dataset.html).
We add the option to change attention to use Rotary Positional Embeddings with Relative distance (RoPER)
below.
"""
pass
def _rotary_value_pe_mha(c: TransformerConfigs):
"""
Use Rotary Positional Embeddings with Relative distance ([RoPER](index.html)) in attention.
"""
from labml_nn.transformers.rope.value_pe import RotaryValuePEMultiHeadAttention
return RotaryValuePEMultiHeadAttention(c.n_heads, c.d_model, 1., 1.)
# Configuration options
calculate(TransformerConfigs.encoder_attn, 'rotary_value', _rotary_value_pe_mha)
calculate(TransformerConfigs.decoder_attn, 'rotary_value', _rotary_value_pe_mha)
calculate(TransformerConfigs.decoder_mem_attn, 'rotary_value', _rotary_value_pe_mha)
def main():
# Create experiment
experiment.create(name="roper_mult", comment="4", writers={'screen', 'labml'})
# Create configs
conf = Configs()
# Override configurations
experiment.configs(conf, {
'max_digits': 8,
'base': 4,
# No fixed positional embeddings
'transformer.src_embed': 'no_pos',
'transformer.tgt_embed': 'no_pos',
# Encoder with RoPER attention
# 'transformer.encoder_attn': 'rotary_value',
# Encoder with RoPE attention
'transformer.encoder_attn': 'rotary',
#
'model': 'rotary_pe_transformer',
# Use a context size of $256$
'seq_len': 512,
# Train for 32 epochs
'epochs': 20,
# Batch size $4$
'batch_size': 16,
# Model size
'd_model': 128,
'transformer.ffn.d_ff': 512,
'transformer.n_heads': 4,
'transformer.dropout': 0.0,
# Use [Adam optimizer](../../optimizers/noam.html)
'optimizer.optimizer': 'Adam',
'optimizer.learning_rate': 2.5e-4,
})
# Set models for saving and loading
experiment.add_pytorch_models({'model': conf.model})
# Start the experiment
with experiment.start():
# Run training
conf.run()
#
if __name__ == '__main__':
main()