|
|
import random
|
|
|
|
|
|
def generate_v1_data():
|
|
|
"""Generates all exhaustive single-digit math problems."""
|
|
|
data = []
|
|
|
|
|
|
|
|
|
ops = {'+': lambda a, b: a + b,
|
|
|
'-': lambda a, b: a - b,
|
|
|
'*': lambda a, b: a * b,
|
|
|
'/': lambda a, b: a / b}
|
|
|
|
|
|
|
|
|
for a in range(10):
|
|
|
for b in range(10):
|
|
|
for op_char, op_func in ops.items():
|
|
|
|
|
|
|
|
|
|
|
|
if op_char == '+':
|
|
|
result = op_func(a, b)
|
|
|
|
|
|
if result <= 9:
|
|
|
problem = f"{a} + {b} = {result}"
|
|
|
data.append(problem)
|
|
|
|
|
|
elif op_char == '-':
|
|
|
result = op_func(a, b)
|
|
|
|
|
|
if 0 <= result <= 9:
|
|
|
problem = f"{a} - {b} = {result}"
|
|
|
data.append(problem)
|
|
|
|
|
|
elif op_char == '*':
|
|
|
result = op_func(a, b)
|
|
|
|
|
|
if result <= 9:
|
|
|
problem = f"{a} * {b} = {result}"
|
|
|
data.append(problem)
|
|
|
|
|
|
elif op_char == '/':
|
|
|
|
|
|
if b == 0:
|
|
|
continue
|
|
|
result = op_func(a, b)
|
|
|
|
|
|
if a % b == 0 and result <= 9:
|
|
|
|
|
|
problem = f"{a} / {b} = {int(result)}"
|
|
|
data.append(problem)
|
|
|
|
|
|
|
|
|
random.shuffle(data)
|
|
|
final_data = [d + "<EOS>" for d in data]
|
|
|
|
|
|
return final_data
|
|
|
|
|
|
class CharacterTokenizer:
|
|
|
"""A simple character-level tokenizer for the math problems."""
|
|
|
|
|
|
def __init__(self, data):
|
|
|
|
|
|
|
|
|
chars = sorted(list(set("".join(data))))
|
|
|
|
|
|
|
|
|
if '<PAD>' not in chars:
|
|
|
chars.append('<PAD>')
|
|
|
|
|
|
self.stoi = {ch: i for i, ch in enumerate(chars)}
|
|
|
self.itos = {i: ch for i, ch in enumerate(chars)}
|
|
|
self.vocab_size = len(chars)
|
|
|
self.pad_token_id = self.stoi['<PAD>']
|
|
|
|
|
|
def encode(self, s):
|
|
|
"""Encodes a string into a list of integers."""
|
|
|
return [self.stoi[c] for c in s]
|
|
|
|
|
|
def decode(self, l):
|
|
|
"""Decodes a list of integers back into a string."""
|
|
|
return "".join([self.itos[i] for i in l]) |