File tree Expand file tree Collapse file tree 3 files changed +20
-27
lines changed Expand file tree Collapse file tree 3 files changed +20
-27
lines changed Original file line number Diff line number Diff line change 736
736
{
737
737
"data" : {
738
738
"text/plain" : [
739
- " [('จะ ', 51681 ),\n " ,
740
- " ('เป็น ', 51273 ),\n " ,
741
- " ('ไป ', 46567 ),\n " ,
742
- " ('ก็ ', 46409 ),\n " ,
743
- " ('ไม่ ', 45895 ),\n " ,
744
- " ('มี ', 44899 ),\n " ,
745
- " ('ได้ ', 44513 ),\n " ,
746
- " ('ว่า ', 40290 ),\n " ,
747
- " ('ให้ ', 38715 )]"
739
+ " [('งวงช้าง ', 12 ),\n " ,
740
+ " ('เทิบทาบ ', 7 ),\n " ,
741
+ " ('กริน ', 3 ),\n " ,
742
+ " ('นาภี ', 2 ),\n " ,
743
+ " ('แด่วๆ ', 3 ),\n " ,
744
+ " ('คู่ใจ ', 7 ),\n " ,
745
+ " ('คุณพ่อ ', 732 ),\n " ,
746
+ " ('สิ้น ', 755 ),\n " ,
747
+ " ('เยาะ ', 150 )]"
748
748
]
749
749
},
750
750
"execution_count" : 28 ,
Original file line number Diff line number Diff line change 5
5
Credit: Korakot Chaovavanich
6
6
https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
7
7
"""
8
- import os
9
8
import re
10
9
11
- from pythainlp .corpus import download as download_data
12
- from pythainlp .corpus import get_corpus
13
- from pythainlp .tools import get_full_data_path
14
10
import requests
11
+ from pythainlp .corpus import get_corpus
12
+
15
13
__all__ = ["word_freq" , "word_freqs" ]
16
14
15
+ _FILENAME = "tnc_freq.txt"
16
+
17
17
18
18
def word_freq (word , domain = "all" ):
19
19
"""
@@ -56,10 +56,10 @@ def word_freqs():
56
56
"""
57
57
Get word frequency from Thai National Corpus (TNC)
58
58
"""
59
- lines = list (get_corpus ("tnc_freq.txt" ))
59
+ lines = list (get_corpus (_FILENAME ))
60
60
listword = []
61
61
for line in lines :
62
- listindata = line .split (" " )
62
+ listindata = line .split ("\t " )
63
63
listword .append ((listindata [0 ], int (listindata [1 ])))
64
64
65
65
return listword
Original file line number Diff line number Diff line change 5
5
Credit: Korakot Chaovavanich
6
6
https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
7
7
"""
8
- import os
9
8
10
- from pythainlp .corpus import download as download_data
11
- from pythainlp .tools import get_full_data_path
9
+ from pythainlp .corpus import get_corpus
12
10
13
11
__all__ = ["word_freqs" ]
14
12
13
+ _FILENAME = "ttc_freq.txt"
14
+
15
15
16
16
def word_freqs ():
17
17
"""
18
18
Get word frequency from Thai Textbook Corpus (TTC)
19
19
"""
20
- path = get_full_data_path ("ttc_freq.txt" ) # try local copy first
21
- if not os .path .exists (path ): # if fail, download from internet
22
- download_data ("ttc" )
23
-
24
- with open (path , "r" , encoding = "utf8" ) as f :
25
- lines = f .read ().splitlines ()
26
- f .close ()
27
-
20
+ lines = list (get_corpus (_FILENAME ))
28
21
listword = []
29
22
for line in lines :
30
- listindata = line .split (" " )
23
+ listindata = line .split ("\t " )
31
24
listword .append ((listindata [0 ], int (listindata [1 ])))
32
25
33
26
return listword
You can’t perform that action at this time.
0 commit comments