-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmap_reduce.txt
More file actions
24 lines (18 loc) · 789 Bytes
/
map_reduce.txt
File metadata and controls
24 lines (18 loc) · 789 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def count_words(doc):
normalised_doc = ''.join(c.lower() if c.isalpha() else ' ' for c in doc)
frequencies = {}
for word in normalised_doc.split():
frequencies[word] = frequencies.get(word, 0) + 1
return frequencies
documents = [
'It was the best of times, it was the worst of times.',
'I went to the woods because I wished to live deliberately, to front the essential facts of life...'
'Friends, Romans, countrymen, lend me your ears; I come to bury Ceasar, not to praise him.',
'I do not like green eggs and ham. I do not like them, Sam-I-Am.'
]
counts = map(count_words, documents)
def combine_counts(d1, d2):
d= d1.copy()
for word, count in d2.items():
d[word] = d.get(word, 0) + count
return d