diff --git a/keras/README.md b/keras/README.md new file mode 100644 index 00000000..4cfa3abd --- /dev/null +++ b/keras/README.md @@ -0,0 +1 @@ +This directory demonstrates the implementation of Transformers in Keras. diff --git a/keras/data.py b/keras/data.py new file mode 100644 index 00000000..80a1206f --- /dev/null +++ b/keras/data.py @@ -0,0 +1,52 @@ +""" +The Data class that streams data batches for the Keras model +""" + +import numpy as np + + +class Data: + TRAIN_SPLIT, VAL_SPLIT = 'train', 'val' + + def __init__(self, seq_len, batch_size, random_seed=0): + """ + The Data class that streams data batches for the Keras model + """ + + np.random.seed(random_seed) + self.seq_len = seq_len # e.g. 8 + self.batch_size = batch_size + + # read it in to inspect it + with open('../input.txt', 'r', encoding='utf-8') as f: + text = f.read() + + # here are all the unique characters that occur in this text + self.chars = sorted(list(set(text))) + self.vocab_size = len(self.chars) + + # create a mapping from characters to integers + stoi = {ch: i for i, ch in enumerate(self.chars)} + itos = {i: ch for i, ch in enumerate(self.chars)} + self.encoder = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers + self.decoder = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string + + data = self.encoder(text) + + n = int(.9 * len(data)) + self.data = {Data.TRAIN_SPLIT: np.array(data[:n]), Data.VAL_SPLIT: np.array(data[n:])} + print(f'Train size: {len(self.data[Data.TRAIN_SPLIT])}; validation size: {len(self.data[Data.VAL_SPLIT])}') + + + def fetch_batch(self, split): + """ + Generate one batch of data + """ + assert split in {Data.TRAIN_SPLIT, Data.VAL_SPLIT} + d = self.data[split] + ix = np.random.randint(0, len(d) - self.seq_len, (self.batch_size,)) + + x = np.vstack([d[i: i + self.seq_len] for i in ix]) + y = np.stack([d[i + 1 : i + self.seq_len + 1] for i in ix]) + + return x, y diff --git a/keras/gpt.py b/keras/gpt.py new file mode 100644 index 00000000..e904d117 --- /dev/null +++ b/keras/gpt.py @@ -0,0 +1,221 @@ +""" +Keras version of the Transformer +""" + +import math +import numpy as np +import tensorflow as tf +from scipy.special import softmax +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras import backend as K +from typing import Callable, List + +from data import Data + + +class Head(layers.Layer): + def __init__(self, head_size, dropout_rate): + super().__init__() + self.head_size = head_size + self.dropout_rate = dropout_rate + + def build(self, input_shape): + self.key = layers.Dense(self.head_size, activation=None, use_bias=False) + self.query = layers.Dense(self.head_size, activation=None, use_bias=False) + self.value = layers.Dense(self.head_size, activation=None, use_bias=False) + self.dropout = layers.Dropout(self.dropout_rate) + self.mask = tf.constant(np.tril(np.ones((input_shape[1], input_shape[1])))) + + def call(self, x, *args, **kwargs): + k = self.key(x) # (B,T,C) + q = self.query(x) # (B,T,C) + + # compute attention scores ("affinities") + k_transposed = K.permute_dimensions(k, (0, 2, 1)) # B, C, T + wei = tf.matmul(q, k_transposed) # B, T, T + wei /= math.sqrt(x.shape[-1]) # scale + wei = layers.Softmax(axis=-1)(wei, self.mask) # softmax while making the upper-triangle all 0 + wei = self.dropout(wei) + + # perform the weighted aggregation of the values + v = self.value(x) # B, T, C + out = tf.matmul(wei, v) # (B, T, T) @ (B, T, C) -> (B, T, C) + + assert out.shape[1] == x.shape[1] and out.shape[2] == self.head_size + + return out + +class MultiHeadAttention(layers.Layer): + def __init__(self, num_heads, head_size, n_embd, dropout_rate): + super().__init__() + self.num_heads = num_heads + self.head_size = head_size + self.n_embd = n_embd + self.dropout_rate = dropout_rate + + def build(self, input_shape): + self.heads = [Head(self.head_size, self.dropout_rate) for _ in range(self.num_heads)] + self.proj = layers.Dense(self.n_embd, activation=None) + self.dropout = layers.Dropout(self.dropout_rate) + + def call(self, x, *args, **kwargs): + out = layers.Concatenate(axis=-1)([h(x) for h in self.heads]) + out = self.dropout(self.proj(out)) + + assert out.shape[1] == x.shape[1] and out.shape[2] == self.n_embd + + return out + + +class FeedForward(layers.Layer): + def __init__(self, n_embd: int, dropout_rate: float): + super().__init__() + + self.n_embd = n_embd + self.dropout_rate = dropout_rate + + def build(self, input_shape): + self.net = keras.Sequential([ + layers.Conv1D(filters=4, kernel_size=1, activation='relu'), + layers.Conv1D(filters=self.n_embd, kernel_size=1), + layers.Dropout(self.dropout_rate) + ]) + + def call(self, x, *args, **kwargs): + out = self.net(x) # B, T, n_embd + assert out.shape[1] == x.shape[1] and out.shape[2] == self.n_embd + + return out + + +class Block(layers.Layer): + def __init__(self, n_embd: int, n_head: int, dropout_rate: float): + assert n_embd % n_head == 0 + super().__init__() + + self.n_embd = n_embd + self.n_head = n_head + self.head_size = n_embd // n_head + self.dropout_rate = dropout_rate + + def build(self, input_shape): + self.sa = MultiHeadAttention(self.n_head, self.head_size, self.n_embd, self.dropout_rate) + self.ffwd = FeedForward(self.n_embd, self.dropout_rate) + self.ln1 = layers.LayerNormalization(epsilon=1e-5) + self.ln2 = layers.LayerNormalization(epsilon=1e-5) + + def call(self, x, *args, **kwargs): + x = x + self.sa(self.ln1(x)) + x = x + self.ffwd(self.ln2(x)) + return x + + +class TransformerLayer(layers.Layer): + def __init__(self, vocab_size, n_embd, n_head, n_block, dropout_rate): + super().__init__() + + self.vocab_size = vocab_size + self.n_embd = n_embd + self.n_head = n_head + self.dropout_rate = dropout_rate + self.n_block = n_block + + def build(self, input_shape): + self.block_size = input_shape[1] + + self.token_embedding_table = layers.Embedding(self.vocab_size, self.n_embd) + self.position_embedding_table = layers.Embedding(self.block_size, self.n_embd) + self.blocks = keras.Sequential([Block(self.n_embd, self.n_head, self.dropout_rate) for _ in range(self.n_block)]) + self.ln_f = layers.LayerNormalization(epsilon=1e-5) + self.lm_head = layers.Dense(self.vocab_size, activation=None) + + def call(self, idx, *args, **kwargs): + # idx is of shape (B, T, C) + + tok_emb = self.token_embedding_table(idx) # (B,T,C) + pos_emb = self.position_embedding_table(tf.range(0, self.block_size)) # (T,C) + x = tok_emb + pos_emb # (B,T,C) + x = self.blocks(x) # (B,T,C) + x = self.ln_f(x) # (B,T,C) + logits = self.lm_head(x) # (B,T,vocab_size) + + return logits + + +class TransformerModel: + def __init__(self, vocab_size, n_embd, n_head, n_layer, block_size, dropout_rate, learning_rate, random_seed=1116): + self.block_size = block_size + self.vocab_size = vocab_size + + keras.utils.set_random_seed(random_seed) + inputs = keras.Input((block_size,)) + outputs = TransformerLayer(vocab_size, n_embd, n_head, n_layer, dropout_rate)(inputs) + self.model = keras.Model(inputs, outputs) + + # keras.optimizers.experimental.AdamW behaves strangely. Using Adam instead for now. + self.model.compile(optimizer=keras.optimizers.Adam(learning_rate), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True)) + self.model.summary() + + def estimate_loss(self, num_iters: int, data: Data) -> dict: + res = dict() + for split in [data.TRAIN_SPLIT, data.VAL_SPLIT]: + loss = np.mean([self.model.evaluate(*data.fetch_batch(split), verbose=0) + for _ in range(num_iters)]) + res[split] = loss + print(f'{split} loss {loss:.4f}') + + return res + + def generate_text(self, max_new_tokens: int, decoder: Callable) -> List[int]: + res = [] + idx = [0] * self.block_size + + for _ in range(max_new_tokens): + idx_cond = idx[-self.block_size:] # crop idx to the last block_size tokens + logits = self.model.predict(np.array([idx_cond]), verbose=0) + logits = logits[0, -1, :] # focus only on the last time step + probs = softmax(logits, axis=-1) # apply softmax to get probabilities + idx_next = np.random.choice(range(self.vocab_size), 1, p=probs)[0] + idx.append(idx_next) + res.append(idx_next) + print(decoder([idx_next]), end='') + + print() + return res + + def train_on_batch(self, x, y, *args, **kwargs): + return self.model.train_on_batch(x, y, *args, **kwargs) + + +# ---------- hyperparameters ---------- +batch_size = 64 # how many independent sequences will we process in parallel? +block_size = 256 # what is the maximum context length for predictions? +max_iters = 5000 +eval_interval = 500 +eval_iters = 200 +learning_rate = 3e-4 +n_embd = 384 +n_head = 6 +n_layer = 6 +dropout_rate = 0.2 + +# ---------- train ---------- +data = Data(block_size, batch_size) +transformer = TransformerModel(data.vocab_size, n_embd, n_head, n_layer, block_size, dropout_rate, learning_rate) + +for i in range(max_iters): + + # every once in a while evaluate the loss on train and val sets + if i % eval_interval == 0 or i == max_iters - 1: + print(f'Step {i}') + transformer.estimate_loss(eval_iters, data) + + print('Text generated:') + transformer.generate_text(500, data.decoder) + + xb, yb = data.fetch_batch(Data.TRAIN_SPLIT) + loss = transformer.train_on_batch(xb, yb) + +open('./more.txt', 'w').write(data.decoder(transformer.generate_text(10000, data.decoder))) diff --git a/keras/more.txt b/keras/more.txt new file mode 100644 index 00000000..05e05871 --- /dev/null +++ b/keras/more.txt @@ -0,0 +1,344 @@ +BOESM: +DoORLLORIO: +No, gold From to Badam unfram +BUTUS: +I tef Cit anot: whence that hast to hope in. + +CORIOLANUS: +We tell he, give must me it hus viction out say. + +BUSHOP: +Yam the no out of of my have our fear. +Alas! you mother as use that do my boy? + +MINIUS: +O, woot leady! you barry, you good, one mine? + +LUCIO: +If you no have you have very. + +COMINIUS: +Awors out hads not Parieve I thee accommand by to friends. + +BISHOP OF ELY: +Shall you my day, my no taughter; +And so I die bed a vicolant'd, at a man +Fre I have most a from him from her your de mother. + +SICINIUS: +My now have lords: them is be you you been, +He have you have no too be: I loved not no and befother, +to not my day, come they hie. + +HERMIO: +O, and see him so? I come you. + +POLIXENES: +So, vasted, good my lord: love, my say I do fawer. + +CATES: +Say, but you not, what I profess me so, +Thank you what you must beide pay me: Let in you not +That you part it: if thou art for be a put: +Trovant all corst boy of my swant, +Clarence thou wert that all mistread to mine +My but set pernis. + +Second Citizen: +Second this whose and show he is the s! +Commends Henry, unly. + +ESCALUS: +You stok it, I the pellsain yor thy at the queen; +This you sweet him in my do a contenty you so 'Trank'd contry. + +QUEEN MARGARET: +No, that's thusbant right to stay; who king her nor no have +Jutcher o' devils to unprocan hate instry, by you +But may to my king us graw from by out enement. + +KING HENRY VI: +Whither I dam must know? + +KING EDWARD IV: +Now life, daughter new their accue: +The pubt the professicienny the but themself me, +And I am here; that my sovere's member my love: +I well it to bick is doth never tell deserver'd to die. + +QUEEN ELIZABET: +You him son that you to the endeer as the carries not; +Forise I see out your grace it. +KING EDWARD IV: +What, it better you less, I hexout of you. + +LADY GREY: +I prother? raye it you; and to stand thee jepatiend! + +KING RICHARD III: +Away, sir; sir, uncle. + +GLOUCESTER: +Why, alrise never thy love's thy greater noble upon 'me: +Thou parts desir, his to To nake kingdom when enry, +They thee such a name must i gratis come roroow is +Undent thy I have speak your a unto pation myself. +My comfort: thou +Bratiought for you the joy, much not Friends gracious +The joy mourt of the night for armit a darers, +I do throat of my kinfoes. But your love inoble. + +LEONTES: +Wear his ast sword for to the him and when? + +KING RICHARD II: +Ay, like the comes of my from my glove: +And thy more my him the fire not wonduct did will: +The can I cut with be recome. +Come seam this that I help you sabell; +His a no medy it now make both eyes: +To no lord, honour ply part scartain: matter +That me confences her man to Say's paat Leoss you thee eneming +But a me now; then both that is say +Sto I common you times our becomes the grant to, +I the back the twill world pass my of be good distard: +This been I devotem I'll be to be sate, +But some dish the post which ontage the march agail. + +LEONTES: +Ay, it I im so spulpeak? + +LUCIO: +Undeed, not say so may grace, bafiery, +The horse more my her thou would by thee thus such us: +The at do to be is bontrack in my well, +And I thy king, Let him me. + +MINIUS: +Indeed to him a my husbroward a cheek; +But thy bittle out make worsing a come? + +YORK: +Ay, no sweet you no be me. + +KING RICHARD II: +I were me will do to my love, +For God when aith the more my like my from my doth, +In my patience, here's care with same one out: +Marcius, to caliack their powed with face. + +BRUTUS: +A was word with mank it this mights, my gracious +With and might Told safe ell know abites, +And I proped by it, and the graged to their compheaven; +And let ote may the love, and down my by sincan +The cut yet have slaye faced a swords, but a wavour +This grance, as I by bitters humole; +How now matter face it the senry and a not all +Aprovotidusict the of the wake and be grae wherefore +Thing the trather at by hearder dard for me: +The hope is bloody, Juliet! Thus Caster kindie be a the and +Have nows'd sill the chirmple and most along. + +ESCALUS: +Strail, love the great his with he slow'd. + +GLOUCESTER: +They new you rover Bilingblood. + +GLOUCESTER: +That may kissuer? + +LADY ANNE: +Wrat crot? he may lie? +God, sad 'tis made me me and agreeth once, +Noberly. +But I do thou thou knact father no even the rife +To and kiltrief o'er my the did +Evice happ is show bristrong in but and of distend +By are must out by our polains. +Have I care purchant cousipe to the rest. For that I, +What's not what hould to from tree him to a die. + +CLIFFORD: +I do prove, +Eame in paint my famb: it disonce a tainst many +Second him holy Proverefords! So how him how that care a belingme: +Haster you to much a yall speak. + +QUEEN MARGARET: +I have go. Largive no been a dukew lord for cause his +Your on him on way, your bade to vision ell +Or go man, Elest' shall whose death made that not +If say your do'ery and the now senion. + +DUKE OF YORK: +To king: and I goddie oppare vice. With of I; it not to my +Oncan could grame thy patient, air, and I ray? + +DUKE OF YORK: +Yet know your her shall shall me been, +So how you talks may but to Pompey? + +DUKE VINCENTIO: +He some by ar-being rode. This child weeps asweep. + +BINGHAM: +No, I stople me to been? + +GLOUCESTER: +I lad thee to a hait been prudic. + +PARIS: +We see it. The were contion thy compalent is to the ell be excome +Take a chall thy earing unce with tee thy grace most +They kingthee, such with great thee glory. + +CLARENCE: +I no dot with that have to me lie disperant: +Thou a conful country'st them that I say the sword; +What thou went will I will go so brother of it; +But out why lord lies. + +DUKE OF YORK: +'O sdanger. + +PAULINA: +Yet you kneed you retchion your deof? + +RICHARD: +I grace my speak'd your in to never tige, +Or but your gaint and in the dain. You saw here that for your +What your slain you hall our given in any +It be comment othing her of hold prome, is it +To the complings perfore of for lance our dign. + +LORD WILLOUGHBY: +The queen, this she had noble well have much +I that sender, by you Warwick'd one womany: +O say husbalt with mand me; one I'll accounting +The quot bell lamont, I'll before him with with henchamon'd? + +LINGORD II: +Thou have I conforet to call I timest. + +NORTHUMBERLAND: +Out may just Vurnow me to executiouch. + +PERDITA: +Romeo, Breath, fom the such my make thy me lady. + +ORD: +Thou man duke came the but from lim their mights, +Fortune's sword, my lay my and will then hatests, +That aow their no comfortunaton thy why thy thou thou thdists news. + +ESCALUS: +So my commicy my love fellow good what'sts made my life +A may from the of sweet, I is lame shollow. + +ISABELLA: +I do't fool-day, Verotion the reame, +Tells I name, this noble times him cheeks +The king exome to not name you +Were cave you have up boice lapts rigol'd for Eme venge +Will you the so poppint: which the shall fall spake +May been the grrander acted great me? why grant's grop: +The service flow'd the most heaven your cares courtural +Than fraint out in trong of your commies, +Not for loveck Most graret's him. And make do bay +From be divine elook, muth help this doth the we lay? +And there happear min-from hertue, brace. + +FRIAR LAURENCE: +You, in go straight the was hastesn'd-poss +A be my hear you, and your content like to hoting +To death: her is be the enaturant whough thou comes beard +The of my him, queen and say death mock my love +To be my live the gold for be eloves +I kissibe some a gaintime to lie. + +QUEEN MARGARET: +Broke with please my friend, commond lord, +And sept he withy sweeth infoces in and how the shows? +Then love not for eaunter a plutter said, +To pal name banion to to my hold; +No I have done rain findiet, and I my days let done. + +KING RICHARD II: +What that, there temper their brother to friends +And your would were be dost dreath and them lain. +Thou miscrown: thy such comfortune, the +Where the will holow him to on the Aur? + +BRUTUS: +And my risclife the great Not fant with him: +No thou a sleep in the see weation how she blood, +Oe the dain mole abain mine of to must you +Intent swith him and so or doth when the eays did +Finereof pack of again their beloved together +An my I had, is the lady the discaed the chard, make +And being a jury herhertry gate my here +Make King Richard Richard Richard Come the frience. + +QUEEN ELIZABETH: +Good her is what he did pattly for the greaten; +I old your you cluse voice dode +For him may crue joy: much that storderums my brother. + +QUEEN ELIZABETH: I it that me hunce I live good do that him: +I am you ear body cheeknow have him unto country. + +DUCHESS OF YORK: +No speak, I will thee to throke a so. + +KING HENRY VI: +Not do my lord: cause for from the Bride father the from a such in +Be my love state: my game by my lord. When subject arms? + +RICHARD: +Then the such am is to generilt honourt husband again these they +That I wail mybeling heart heaven him brother; +And your and thou good beate powern and his on power. + +LORD WILLOUGHBY: +Wilt, part Believe out it. GauMesent its? + +JOHN OF GAUNT: +When my most heart I know refore ave. By the could +My been daught say the play's days earth a und all yoldren +These but new he is love befoldise. + +LADY GREY: +More stue all present mortague the groce of voint, +Than mayany hour I bainted to her lords. + +QUEEN ELIZABETH: +Not word lands, he much spoke thy lady some soul: +Then I rape they arm happy cloied mise part he his aftict acree, +Din his opparess tractor: may sonoul near. + +DUCHESS OF YORK: + +GLOUCESTER: +Come, what you marry, this be. + +RIVERS: +Well him more sative to to my him have: +Madam him commilia. + +GLOUCESTER: +Get move pleasener Ance at jeal thy charcet to poor +Our thou have our her swords flaster, +Have bold time against of earth mar: +The worth wood reven himself him did pace, +Or ever one wilt that dim of I'll nur my give sae. + +CLIFFORD: +Come, I constant be thy cried great thee had thou doth +To doth madam'd me to my be carm to the do i-dotch +Thou crown. Look intimo thy stand Canious have me +To from lipbrothe ever thee been a go: +The with the befold, that so aby are to so +As I can they +Fanciracce dust the royal \ No newline at end of file