forked from fluentpython/example-code-2e
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex0.py
28 lines (23 loc) · 904 Bytes
/
index0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# adapted from Alex Martelli's example in "Re-learning Python"
# http://www.aleax.it/Python/accu04_Relearn_Python_alex.pdf
# (slide 41) Ex: lines-by-word file index
# tag::INDEX0[]
"""Build an index mapping word -> list of occurrences"""
import re
import sys
WORD_RE = re.compile(r'\w+')
index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
for line_no, line in enumerate(fp, 1):
for match in WORD_RE.finditer(line):
word = match.group()
column_no = match.start() + 1
location = (line_no, column_no)
# this is ugly; coded like this to make a point
occurrences = index.get(word, []) # <1>
occurrences.append(location) # <2>
index[word] = occurrences # <3>
# display in alphabetical order
for word in sorted(index, key=str.upper): # <4>
print(word, index[word])
# end::INDEX0[]