@@ -23,8 +23,10 @@ def word_freq(word: str, domain: str = "all") -> int:
23
23
This function will make a query to the server of Thai National Corpus.
24
24
Internet connection is required.
25
25
26
- **IMPORTANT:** Currently (as of 29 April 2019) always return 0,
27
- as the service URL has been changed and the code is not updated yet.
26
+ **IMPORTANT:** Currently (as of 29 April 2019) it is likely to return 0,
27
+ regardless of the word, as the service URL has been changed and the code
28
+ is not updated yet.
29
+ New URL is http://www.arts.chula.ac.th/~ling/tnc3/
28
30
29
31
:param string word: word
30
32
:param string domain: domain
@@ -42,8 +44,7 @@ def word_freq(word: str, domain: str = "all") -> int:
42
44
"leisure" : "9" ,
43
45
"others" : "0" ,
44
46
}
45
- url = "http://www.arts.chula.ac.th/~ling/TNCII/corp.php"
46
- # New URL is http://www.arts.chula.ac.th/~ling/tnc3/
47
+ url = "http://www.arts.chula.ac.th/~ling/tnc3/"
47
48
data = {"genre[]" : "" , "domain[]" : listdomain [domain ], "sortby" : "perc" , "p" : word }
48
49
49
50
r = requests .post (url , data = data )
@@ -63,9 +64,10 @@ def word_freqs() -> List[Tuple[str, int]]:
63
64
Get word frequency from Thai National Corpus (TNC)
64
65
"""
65
66
lines = list (get_corpus (_FILENAME ))
66
- listword = []
67
+ word_freqs = []
67
68
for line in lines :
68
- listindata = line .split ("\t " )
69
- listword .append ((listindata [0 ], int (listindata [1 ])))
69
+ word_freq = line .split ("\t " )
70
+ if len (word_freq ) >= 2 :
71
+ word_freqs .append ((word_freq [0 ], int (word_freq [1 ])))
70
72
71
- return listword
73
+ return word_freqs
0 commit comments