-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathhover_indexing.py
30 lines (21 loc) · 988 Bytes
/
hover_indexing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
import argparse
from colbert.infra import Run, ColBERTConfig, RunConfig
from colbert import Indexer
from colbert.utils.utils import print_message
def main(args):
print_message("#> Starting...")
collection_path = os.path.join(args.datadir, 'wiki.abstracts.2017/collection.tsv')
checkpoint_path = os.path.join(args.datadir, 'hover.checkpoints-v1.0/flipr-v1.0.dnn')
with Run().context(RunConfig(root=args.root)):
config = ColBERTConfig(doc_maxlen=256, nbits=args.nbits)
indexer = Indexer(checkpoint_path, config=config)
indexer.index(name=args.index, collection=collection_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--root", type=str, required=True)
parser.add_argument("--datadir", type=str, required=True)
parser.add_argument("--index", type=str, required=True)
parser.add_argument("--nbits", type=int, required=True)
args = parser.parse_args()
main(args)