Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The first step is to examine how different answers are referred to in
the dataset.

```
python3 intermediate_results/lat_frequency.json
make intermediate_results/lat_frequency.json
```

Next, we transform question from the QB format to look like the NQ
Expand Down
6 changes: 4 additions & 2 deletions compute_lat_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def compute_lat_frequency(self, orig_qb_path: str, limit: int=-1) -> None:
text = qb_data[i]['text']
lats = self.count_answer_types(Question(qid, page, text))
# Printing here could cause unicode conversion error ifpage is not pure ASCII
if i % 10000 == 0:
if i % 100 == 0:
print("===> %i/%i: %s %s" % (i, len(qb_data), page, str(lats)))
if limit > 0 and i > limit:
break
Expand All @@ -73,6 +73,9 @@ def write_most_freq_answer_type_for_qid(self, qanta_train_with_answer_type_path:
page_to_most_freq_answer_type_dict[qb_data[i]['qanta_id']] = self.most_common(qb_data[i]['page'])

#save the most freq answer type for each qid into dictionary
if not os.path.exists(output_file): # create path if it doesn't exist
os.makedirs(output_file)
os.chmod(output_file, mode=0o777) # edit the permission
with open(output_file, 'w') as fp:
json.dump(page_to_most_freq_answer_type_dict, fp, indent=2)

Expand All @@ -84,7 +87,6 @@ def write_most_freq_answer_type_for_qid(self, qanta_train_with_answer_type_path:
default='qanta.train.2018.04.18.json',
help="path of the qb dataset")
args = parser.parse_args()

# load configuration
lat_freq_calculator = LatFrequencyComputer()
lat_freq_calculator.compute_lat_frequency(args.qb_path, limit=args.limit)
Expand Down
6 changes: 3 additions & 3 deletions quality_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def generate_feature_weight(self, model):
feature_weight["BIAS"] = model.intercept_[0]
return feature_weight

def save_dictionary(self, questions, file_path):
def save_dictionary(self, questions, file_path, model): #add argument "model"
x = self.prepare_features(questions)
results = model.predict_proba(x)

Expand Down Expand Up @@ -394,7 +394,7 @@ def save_dictionary(self, questions, file_path):
parser.add_argument('--nq_data', type=str, default='TriviaQuestion2NQ_Transform_Dataset/NaturalQuestions_train_reformatted.json')
parser.add_argument('--nqlike_data', type=str, default='intermediate_results/nqlike_train.json')
parser.add_argument('--max_term_features', type=int, default=50)
parser.add_argument('--seq', type=str, default='')
parser.add_argument('--seq', type=str, default='nq_like')
args = parser.parse_args()
# set flag and if_qb_last_sent here
# 0 --wellformedness accuracy output
Expand Down Expand Up @@ -422,5 +422,5 @@ def save_dictionary(self, questions, file_path):
if args.predictions:
# test
print('Evaluate NQ-like')
c.save_dictionary(nq_like, prediction_path)
c.save_dictionary(nq_like, prediction_path, model)

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
spacy
spacy==2.1.0
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to specify "spacy==2.1.0" in requirements, or install spacy first before building neuralcoref from source in Makefile. This can avoid incompatibilities, as suggested in huggingface/neuralcoref#197

neuralcoref
matplotlib
sklearn