Skip to content

Commit 385c42b

Browse files
committed
For great justice.
1 parent 3b332e9 commit 385c42b

File tree

4 files changed

+120
-13
lines changed

4 files changed

+120
-13
lines changed

hmm.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
'''
44

55
from random import uniform, random
6-
from sample import *
76

87
class MarkovModel:
98
def __init__(self, probs=[], emissions=[]):
@@ -24,14 +23,12 @@ def run(self, endfunc):
2423
i = int(uniform(0, len(self.e)))
2524
word = self.e[i]
2625
while(endfunc(word, c) == False):
27-
print(word, end=' ')
28-
newi = random()
29-
newi = search(self.p[i], newi)
30-
word = self.e[newi]
31-
i = newi
32-
c += 1
33-
print()
26+
pass
27+
# Your code here!
3428

29+
'''
30+
PROVIDED CODE:
31+
'''
3532
def train(mm, data):
3633
assert type(mm) is MarkovModel
3734
l = 0
@@ -58,17 +55,15 @@ def train(mm, data):
5855
for i in range(len(mm.p)):
5956
denom = sum(mm.p[i])
6057
for j in range(len(mm.p[i])):
58+
# Plus-one smoothing, for variety
6159
mm.p[i][j] = (mm.p[i][j] + 1) / (denom + len(mm.p[i]))
6260
if(j > 1):
6361
mm.p[i][j] += mm.p[j][j-1]
6462

6563

66-
'''
67-
PROVIDED CODE:
68-
'''
6964

7065
def endOnString(st, i, endstr="\n"):
71-
if(st == endstr):
66+
if(st.strip() == endstr):
7267
return True
7368
else:
7469
return False

hmm_sol.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
'''
2+
STUDENT CODE:
3+
'''
4+
5+
from random import uniform, random
6+
from sample import *
7+
8+
class MarkovModel:
9+
def __init__(self, probs=[], emissions=[]):
10+
assert type(probs) is list and type(emissions) is list
11+
assert len(probs) == len(emissions)
12+
self.p = probs
13+
self.e = emissions
14+
self.indices = {}
15+
16+
'''
17+
Run markov model until endfunc returns anything other than False
18+
endfunc must be a function that takes a string (for last emission)
19+
and an int representing the number of iterations run.
20+
endfunc must return False to continue or anything else (True) to stop
21+
'''
22+
def run(self, endfunc):
23+
c = 0
24+
i = int(uniform(0, len(self.e)))
25+
word = self.e[i]
26+
while(endfunc(word, c) == False):
27+
print(word, end=' ')
28+
newi = random()
29+
newi = search(self.p[i], newi)
30+
word = self.e[newi]
31+
i = newi
32+
c += 1
33+
print()
34+
35+
def train(mm, data):
36+
assert type(mm) is MarkovModel
37+
l = 0
38+
# Reset the hmm, just good practice.
39+
mm.e = []
40+
mm.p = []
41+
mm.indices = {}
42+
43+
data = [(d.split(' ') + ['\n']) for d in data]
44+
prev = None
45+
for sent in data:
46+
for word in sent:
47+
if(mm.indices.get(word) == None):
48+
mm.indices[word] = l
49+
l += 1
50+
mm.e.append(word)
51+
mm.p = [([0] * len(mm.e)) for i in range(len(mm.e))]
52+
for sent in data:
53+
for word in sent:
54+
if(mm.indices.get(prev) != None):
55+
mm.p[mm.indices[prev]][mm.indices[word]] += 1
56+
prev = word
57+
# Divide everything by sum to get probabilities out of 1
58+
for i in range(len(mm.p)):
59+
denom = sum(mm.p[i])
60+
for j in range(len(mm.p[i])):
61+
# Plus-one smoothing, for variety
62+
mm.p[i][j] = (mm.p[i][j] + 1) / (denom + len(mm.p[i]))
63+
if(j > 1):
64+
mm.p[i][j] += mm.p[j][j-1]
65+
66+
67+
'''
68+
PROVIDED CODE:
69+
'''
70+
71+
def endOnString(st, i, endstr="\n"):
72+
if(endstr in st):
73+
return True
74+
else:
75+
return False
76+
77+
def endAfterN(st, i, n=100):
78+
if(i == n):
79+
return True
80+
else:
81+
return False
82+
83+
# Parse Project Gutenberg csv
84+
def dataFromNovel(filename):
85+
f = open(filename)
86+
data = []
87+
for line in f:
88+
# Remove extraneous lines
89+
if(len(line) > 3):
90+
# Add everything but the first char (") and the last 2 ("\n).
91+
data.append(line[1:-2])
92+
return data
93+
94+
95+
def search(l, val, j=0):
96+
assert type(l) is list
97+
if(len(l) <= 1):
98+
return j
99+
else:
100+
i = len(l) // 2
101+
if(val < l[i]):
102+
return search(l[:i], val, j)
103+
else:
104+
return search(l[i:], val, j+i)

samplerun.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from hmm_sol import *
2+
from twitter_api import get_tweets
3+
4+
m = MarkovModel()
5+
d = get_tweets('SarcasticRover')
6+
train(m, d)
7+
m.run(endOnString)

twitter_api.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import oauth2 as oauth
22
import json
3+
import re
34

45
#twitter_handle should NOT include the '@' symbol
56
def get_tweets(twitter_handle):
@@ -18,7 +19,7 @@ def get_tweets(twitter_handle):
1819
tweet_data = json.loads(data.decode('utf-8'))
1920
tweets = []
2021
for tweet in tweet_data['statuses']:
21-
tweets.append(tweet['text'] + ' \n')
22+
tweets.append(re.sub(r'https:\/\/t\.co\/.{10}', '', tweet['text'] + ' \n'))
2223
return tweets
2324

2425
#example

0 commit comments

Comments
 (0)