forked from booknlp/booknlp
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathrun_booknlp.py
More file actions
executable file
·41 lines (33 loc) · 1.76 KB
/
run_booknlp.py
File metadata and controls
executable file
·41 lines (33 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python3
import argparse
import os
from booknlp.booknlp import BookNLP
def main():
parser = argparse.ArgumentParser(description='Run BookNLP pipeline on a text file')
parser.add_argument('input_file', help='Input text file to process')
parser.add_argument('--output-dir', default='output', help='Output directory for results')
parser.add_argument('--model', choices=['small', 'big'], default='big', help='Model size to use (big for higher accuracy)')
parser.add_argument('--pipeline', default='entity,quote,supersense,event,coref',
help='Pipeline components to run (comma-separated)')
args = parser.parse_args()
# Create output directory if it doesn't exist
os.makedirs(args.output_dir, exist_ok=True)
# Extract book_id from input filename
book_id = args.input_file.split('/')[-1].split('.')[0]
# Model parameters with BERT enabled and optimized for accuracy
model_params = {
"pipeline": args.pipeline, # Customizable pipeline components
"model": args.model, # 'big' model for better accuracy
"use_bert": True, # Enable BERT for better accuracy
"bert_model": "bert-base-uncased", # Use base BERT model
"pronominalCorefOnly": False # Enable full coreference for more comprehensive analysis
}
print(f"Using model: {args.model} with BERT enabled")
# Initialize BookNLP with English language and model parameters
booknlp = BookNLP("en", model_params)
# Process the input file and save results to output directory
print(f"Processing {args.input_file}...")
booknlp.process(args.input_file, args.output_dir, book_id)
print(f"Results saved in {args.output_dir}/{book_id}.*")
if __name__ == "__main__":
main()