|
| 1 | +''' |
| 2 | +STUDENT CODE: |
| 3 | +''' |
| 4 | + |
| 5 | +from random import uniform, random |
| 6 | +from sample import * |
| 7 | + |
| 8 | +class MarkovModel: |
| 9 | + def __init__(self, probs=[], emissions=[]): |
| 10 | + assert type(probs) is list and type(emissions) is list |
| 11 | + assert len(probs) == len(emissions) |
| 12 | + self.p = probs |
| 13 | + self.e = emissions |
| 14 | + self.indices = {} |
| 15 | + |
| 16 | + ''' |
| 17 | + Run markov model until endfunc returns anything other than False |
| 18 | + endfunc must be a function that takes a string (for last emission) |
| 19 | + and an int representing the number of iterations run. |
| 20 | + endfunc must return False to continue or anything else (True) to stop |
| 21 | + ''' |
| 22 | + def run(self, endfunc): |
| 23 | + c = 0 |
| 24 | + i = int(uniform(0, len(self.e))) |
| 25 | + word = self.e[i] |
| 26 | + while(endfunc(word, c) == False): |
| 27 | + print(word, end=' ') |
| 28 | + newi = random() |
| 29 | + newi = search(self.p[i], newi) |
| 30 | + word = self.e[newi] |
| 31 | + i = newi |
| 32 | + c += 1 |
| 33 | + print() |
| 34 | + |
| 35 | +def train(mm, data): |
| 36 | + assert type(mm) is MarkovModel |
| 37 | + l = 0 |
| 38 | + # Reset the hmm, just good practice. |
| 39 | + mm.e = [] |
| 40 | + mm.p = [] |
| 41 | + mm.indices = {} |
| 42 | + |
| 43 | + data = [(d.split(' ') + ['\n']) for d in data] |
| 44 | + prev = None |
| 45 | + for sent in data: |
| 46 | + for word in sent: |
| 47 | + if(mm.indices.get(word) == None): |
| 48 | + mm.indices[word] = l |
| 49 | + l += 1 |
| 50 | + mm.e.append(word) |
| 51 | + mm.p = [([0] * len(mm.e)) for i in range(len(mm.e))] |
| 52 | + for sent in data: |
| 53 | + for word in sent: |
| 54 | + if(mm.indices.get(prev) != None): |
| 55 | + mm.p[mm.indices[prev]][mm.indices[word]] += 1 |
| 56 | + prev = word |
| 57 | + # Divide everything by sum to get probabilities out of 1 |
| 58 | + for i in range(len(mm.p)): |
| 59 | + denom = sum(mm.p[i]) |
| 60 | + for j in range(len(mm.p[i])): |
| 61 | + # Plus-one smoothing, for variety |
| 62 | + mm.p[i][j] = (mm.p[i][j] + 1) / (denom + len(mm.p[i])) |
| 63 | + if(j > 1): |
| 64 | + mm.p[i][j] += mm.p[j][j-1] |
| 65 | + |
| 66 | + |
| 67 | +''' |
| 68 | +PROVIDED CODE: |
| 69 | +''' |
| 70 | + |
| 71 | +def endOnString(st, i, endstr="\n"): |
| 72 | + if(endstr in st): |
| 73 | + return True |
| 74 | + else: |
| 75 | + return False |
| 76 | + |
| 77 | +def endAfterN(st, i, n=100): |
| 78 | + if(i == n): |
| 79 | + return True |
| 80 | + else: |
| 81 | + return False |
| 82 | + |
| 83 | +# Parse Project Gutenberg csv |
| 84 | +def dataFromNovel(filename): |
| 85 | + f = open(filename) |
| 86 | + data = [] |
| 87 | + for line in f: |
| 88 | + # Remove extraneous lines |
| 89 | + if(len(line) > 3): |
| 90 | + # Add everything but the first char (") and the last 2 ("\n). |
| 91 | + data.append(line[1:-2]) |
| 92 | + return data |
| 93 | + |
| 94 | + |
| 95 | +def search(l, val, j=0): |
| 96 | + assert type(l) is list |
| 97 | + if(len(l) <= 1): |
| 98 | + return j |
| 99 | + else: |
| 100 | + i = len(l) // 2 |
| 101 | + if(val < l[i]): |
| 102 | + return search(l[:i], val, j) |
| 103 | + else: |
| 104 | + return search(l[i:], val, j+i) |
0 commit comments