with open('data/Time Dataset.json','r') as f:
dataset = json.loads(f.read())
with open('data/Time Vocabs.json','r') as f:
human_vocab, machine_vocab = json.loads(f.read())
human_vocab_size = len(human_vocab)
machine_vocab_size = len(machine_vocab)
m = len(dataset)
def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):
m = len(dataset)
X = np.zeros([m, Tx], dtype='int32')
Y = np.zeros([m, Ty], dtype='int32')
for i in range(m):
data = dataset[i]
X[i] = np.array(tokenize(data[0], human_vocab, Tx))
Y[i] = np.array(tokenize(data[1], machine_vocab, Ty))
Xoh = oh_2d(X, len(human_vocab))
Yoh = oh_2d(Y, len(machine_vocab))
return (X, Y, Xoh, Yoh)