-
Notifications
You must be signed in to change notification settings - Fork 0
/
genSum.py
52 lines (45 loc) · 1.7 KB
/
genSum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest
import spacy.cli
import sys
def summarize(text, per):
nlp = spacy.load("en_core_web_md")
f = open(text, "r")
texts = f.read()
doc = nlp(texts)
tokens = [token.text for token in doc]
word_frequencies = {}
for word in doc:
if word.text.lower() not in list(STOP_WORDS):
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
max_frequency=max(word_frequencies.values())
for word in word_frequencies.keys():
word_frequencies[word] = word_frequencies[word]/max_frequency
sentence_tokens = []
for sent in doc.sents:
sentence_tokens.append(sent)
sentence_scores = {}
for sent in sentence_tokens:
for word in sent:
if word.text.lower() in word_frequencies.keys():
if sent not in sentence_scores.keys():
sentence_scores[sent]=word_frequencies[word.text.lower()]
else:
sentence_scores[sent]+=word_frequencies[word.text.lower()]
select_length=int(len(sentence_tokens)*per)
if select_length < 1:
select_length = 1
summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
final_summary=[word.text for word in summary]
summary=''.join(final_summary)
return summary
if __name__ == '__main__':
summary = summarize(sys.argv[1], 0.05)
with open(f"{sys.argv[2]}/summary.txt", "w") as f:
f.write(summary)