Skip to content
This repository was archived by the owner on Jun 1, 2023. It is now read-only.

Commit f702554

Browse files
committed
added packages
1 parent dec8e91 commit f702554

File tree

4 files changed

+10
-38
lines changed

4 files changed

+10
-38
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
__pycache__
1+
__pycache__
2+
pyate

basic.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,5 @@ def basic(technical_corpus, *args, **kwargs):
1212
return combo_basic(technical_corpus, weights=weights, *args, **kwargs)
1313

1414
if __name__ == "__main__":
15-
import pickle
16-
pkl = pickle.load(open("../data/pmc_testing.pkl", "rb"))
17-
print(len(pkl))
18-
corpus = pkl
19-
print(TermExtraction(pkl[0]).basic().sort_values(ascending=False).head(50))
15+
pkl = "hello world"
16+
print(TermExtraction(pkl).basic().sort_values(ascending=False).head(50))

combo_basic.py

+6-28
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,6 @@
55
import numpy as np
66
from term_extraction import TermExtraction, add_term_extraction_method
77

8-
start_ = 0
9-
tmp = 0
10-
# TOTAL_WORK = 27768
11-
# success = 27768
12-
# pbar = tqdm(total=27768)
13-
14-
15-
def start():
16-
global start_
17-
start_ = time.time()
18-
19-
20-
def end():
21-
global start_
22-
print(time.time() - start_)
23-
24-
25-
MAX_WORD_LENGTH = 6
26-
THRESHOLD = 0
27-
28-
298
def helper_get_subsequences(s):
309
sequence = s.split()
3110
if len(sequence) <= 2:
@@ -49,8 +28,6 @@ def combo_basic(
4928
weights=None,
5029
):
5130

52-
# TODO
53-
5431
if technical_counts is None:
5532
technical_counts = (
5633
TermExtraction(technical_corpus)
@@ -106,9 +83,10 @@ def score_of_children(candidate):
10683

10784

10885
if __name__ == "__main__":
109-
import pickle
86+
# import pickle
11087

111-
pkl = pickle.load(open("../data/pmc_testing.pkl", "rb"))
112-
print(len(pkl))
113-
corpus = pkl
114-
print(TermExtraction(pkl[0]).combo_basic().sort_values(ascending=False).head(50))
88+
# pkl = pickle.load(open("../data/pmc_testing.pkl", "rb"))
89+
# print(len(pkl))
90+
# corpus = pkl
91+
pkl = "Hello I am a good extractor."
92+
print(TermExtraction(pkl).combo_basic().sort_values(ascending=False).head(50))

term_extraction.py

-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import math
77
from tqdm import tqdm
88
import pandas as pd
9-
10-
# from pathos.multiprocessing import ProcessingPool as Pool
119
from multiprocessing import Pool
1210
from spacy.matcher import Matcher
1311
from collections import defaultdict
@@ -105,7 +103,6 @@ def add_to_counter(matcher, doc, i, matches):
105103
return term_counter
106104

107105
def count_terms_from_documents(self, seperate=False, verbose=False):
108-
109106
if type(self.corpus) is str:
110107
term_counter = pd.Series(self.count_terms_from_document(self.corpus))
111108
elif type(self.corpus) is list or type(self.corpus) is pd.Series:
@@ -155,7 +152,6 @@ def error_callback(e):
155152
P.terminate()
156153
if verbose:
157154
pbar.close()
158-
# print(term_counter)
159155
else:
160156
raise TypeError()
161157

0 commit comments

Comments
 (0)