-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
40 lines (34 loc) · 1.12 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# coding: utf-8
import numpy as np
import os
def load_pretrained_wordvector(filename):
vocab, embed = [], []
file = open(filename, 'r', encoding='utf-8')
id = 0
for line in file.readlines():
temp = line.strip().split()
if id > 0:
vocab.append(temp[0])
embed.append(temp[1:])
else:
n_embed = int(temp[1])
# vocab.append('unk')
embed.append([0] * n_embed)
id += 1
print('Great! Loaded word_embedding successfully !')
file.close()
return vocab, embed
if __name__ == '__main__':
fold_path = os.getcwd() + '\\related_data'
file_read = fold_path + '\output_final.txt'
with open(file_read, 'r', encoding='utf-8') as f:
data = f.readlines()
# 将预测正确的样本加入新的训练集
file_write = fold_path + '\data_true.txt'
f = open(file_write, 'a', encoding='utf-8')
for s in data:
air_name = s.split('\t')[-4]
text = s.split('\t')[-1]
if air_name[:-2] in text:
f.write(air_name + ' ' + text.replace('【', '').replace('】', ''))
f.close()