-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_demo.py
80 lines (64 loc) · 2.28 KB
/
run_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import random
import torch
from efficient.model import CharCNNLSTMModel, Dataset
from efficient.utils import read_corpus, read_labels, read_raw_corpus
from efficient.vocab import Vocab
index_mapping = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
def demo(
model_path,
vocab_path,
test_contents_path,
test_label_path,
num_examples=10,
**model_config
):
vocab = Vocab.load(vocab_path)
predictor = CharCNNLSTMModel(vocab, **model_config)
device = "cuda" if torch.cuda.is_available() else "cpu"
predictor.model.load_state_dict(
torch.load(model_path, map_location=torch.device(device))
)
test_contents = read_corpus(test_contents_path)
test_labels = read_labels(test_label_path)
test_raw_contents = read_raw_corpus(test_contents_path)
test_data = list(zip(test_contents, test_labels, test_raw_contents))
demo_data = random.choices(test_data, k=num_examples)
demo_contents = [c for c, l, r in demo_data]
demo_labels = [l for c, l, r in demo_data]
demo_raw_contents = [r for c, l, r in demo_data]
demo_dataset = Dataset(
demo_contents, demo_labels, vocab, model_config.get("max_word_length"), "cpu"
)
demo_contents, demo_labels, demo_contents_lengths = demo_dataset[len(demo_dataset)]
with torch.no_grad():
pred = predictor.model(demo_contents, demo_contents_lengths)
predicted_labels = torch.argmax(pred, dim=1)
for content, gt, pr in zip(demo_raw_contents, demo_labels, predicted_labels):
print("Content:", content)
print("Predicted category:", index_mapping[int(pr)])
print("Ground truth category:", index_mapping[int(gt)])
print("\n")
if __name__ == "__main__":
vocab_path = "data/vocab.json"
test_contents_path = "data/test_content.txt"
test_label_path = "data/test_label.txt"
model_path = "checkpoints/long/best_model.pkl"
model_config = dict(
char_embed_size=25,
embed_size=500,
hidden_size=500,
max_word_length=30,
batch_size=100,
eta=0.001,
max_grad_norm=1,
max_iter=500,
val_batch_size=500,
)
demo(
model_path,
vocab_path,
test_contents_path,
test_label_path,
num_examples=10,
**model_config
)