Pinafore · SoyMark · Aug 14, 2022 · Aug 17, 2022 · Aug 17, 2022 · Aug 18, 2022
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ The first step is to examine how different answers are referred to in
 the dataset.
 
 ```
-python3 intermediate_results/lat_frequency.json
+make intermediate_results/lat_frequency.json
 ```
 
 Next, we transform question from the QB format to look like the NQ

diff --git a/compute_lat_frequency.py b/compute_lat_frequency.py
@@ -46,7 +46,7 @@ def compute_lat_frequency(self, orig_qb_path: str, limit: int=-1) -> None:
         text = qb_data[i]['text']
         lats = self.count_answer_types(Question(qid, page, text))
         # Printing here could cause unicode conversion error ifpage is not pure ASCII
-        if i % 10000 == 0:
+        if i % 100 == 0:
           print("===> %i/%i: %s %s" % (i, len(qb_data), page, str(lats)))
         if limit > 0 and i > limit:
           break
@@ -73,6 +73,9 @@ def write_most_freq_answer_type_for_qid(self, qanta_train_with_answer_type_path:
         page_to_most_freq_answer_type_dict[qb_data[i]['qanta_id']] = self.most_common(qb_data[i]['page'])
 
     #save the most freq answer type for each qid into dictionary
+    dir_name = os.path.dirname(output_file) 
+    if not os.path.exists(dir_name): # create path if it doesn't exist
+      os.makedirs(dir_name) 
     with open(output_file, 'w') as fp:
       json.dump(page_to_most_freq_answer_type_dict, fp, indent=2)
 

diff --git a/quality_classifier.py b/quality_classifier.py
@@ -350,7 +350,7 @@ def generate_feature_weight(self, model):
     feature_weight["BIAS"] = model.intercept_[0]
     return feature_weight
 
-  def save_dictionary(self, questions, file_path):   
+  def save_dictionary(self, questions, file_path, model):   #add argument "model" 
     x = self.prepare_features(questions)
     results = model.predict_proba(x)
 
@@ -394,7 +394,7 @@ def save_dictionary(self, questions, file_path):
   parser.add_argument('--nq_data', type=str, default='TriviaQuestion2NQ_Transform_Dataset/NaturalQuestions_train_reformatted.json')
   parser.add_argument('--nqlike_data', type=str, default='intermediate_results/nqlike_train.json')  
   parser.add_argument('--max_term_features', type=int, default=50)
-  parser.add_argument('--seq', type=str, default='')
+  parser.add_argument('--seq', type=str, default='nq_like')
   args = parser.parse_args()
 	# set flag and if_qb_last_sent here
 	# 0 --wellformedness accuracy output
@@ -422,5 +422,5 @@ def save_dictionary(self, questions, file_path):
   if args.predictions:
 		# test
     print('Evaluate NQ-like')
-    c.save_dictionary(nq_like, prediction_path)
+    c.save_dictionary(nq_like, prediction_path, model) 
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
-spacy
+spacy==2.1.0
 neuralcoref
 matplotlib
+sklearn