with open('data/Time Dataset.json','r') as f: dataset = json.loads(f.read())with open('data/Time Vocabs.json','r') as f: human_vocab, machine_vocab = json.loads(f.read())human_vocab_size = len(human_vocab)machine_vocab_size = len(machine_vocab)m = len(dataset)def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty): m = len(dataset) X = np.zeros([m, Tx], dtype='int32') Y = np.zeros([m, Ty], dtype='int32') for i in range(m): data = dataset[i] X[i] = np.array(tokenize(data[0], human_vocab, Tx)) Y[i] = np.array(tokenize(data[1], machine_vocab, Ty)) Xoh = oh_2d(X, len(human_vocab)) Yoh = oh_2d(Y, len(machine_vocab)) return (X, Y, Xoh, Yoh)