-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdwl_tweet_files.py
44 lines (43 loc) · 1.11 KB
/
dwl_tweet_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# map between word(s) & emoji(s), can be many to many
# we can do that but
# 1 it requires a good machine, we dont have that
# 2 it requires alot of data, we can download data from above
# given a word find related emoji
# import gensim.downloader as api
from data import *;
import dwl_tweets;
corpus=[];
pref_corpus=[];
def read_file(f_name):
f=open(f_name,'r');
content=f.read();
f.close();
return int(content);
def write_file(f_name,content):
f=open(f_name,'w+');
f.write(content)
f.close();
def read_write_file(f_name):
i=read_file(f_name);
i+=Q_LEN;
write_file(f_name,str(i) );
return i-Q_LEN;
def write_corpus(i,corpus):
f=open(F_NAME_PREF+str(i)+'.txt','w+',encoding='utf-8');
for arr in corpus:
f.write(' '.join(arr) );
f.write('\n');
f.close();
def main():
# how many files to create?
t=1;
for j in range(t):
i=read_file('start.txt');
corpus=dwl_tweets.main(i);
if corpus==-1:
break;
write_corpus(i,corpus)
i+=Q_LEN;
write_file('start.txt',str(i) );
if __name__ == "__main__":
main()