Skip to content

Commit af3e59f

Browse files
committed
TTC will now use local copy of tcc_freq.txt
1 parent 0eac189 commit af3e59f

File tree

3 files changed

+20
-27
lines changed

3 files changed

+20
-27
lines changed

notebooks/pythainlp-get-started.ipynb

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -736,15 +736,15 @@
736736
{
737737
"data": {
738738
"text/plain": [
739-
"[('จะ', 51681),\n",
740-
" ('เป็น', 51273),\n",
741-
" ('ไป', 46567),\n",
742-
" ('ก็', 46409),\n",
743-
" ('ไม่', 45895),\n",
744-
" ('มี', 44899),\n",
745-
" ('ได้', 44513),\n",
746-
" ('ว่า', 40290),\n",
747-
" ('ให้', 38715)]"
739+
"[('งวงช้าง', 12),\n",
740+
" ('เทิบทาบ', 7),\n",
741+
" ('กริน', 3),\n",
742+
" ('นาภี', 2),\n",
743+
" ('แด่วๆ', 3),\n",
744+
" ('คู่ใจ', 7),\n",
745+
" ('คุณพ่อ', 732),\n",
746+
" ('สิ้น', 755),\n",
747+
" ('เยาะ', 150)]"
748748
]
749749
},
750750
"execution_count": 28,

pythainlp/corpus/tnc.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
Credit: Korakot Chaovavanich‎
66
https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
77
"""
8-
import os
98
import re
109

11-
from pythainlp.corpus import download as download_data
12-
from pythainlp.corpus import get_corpus
13-
from pythainlp.tools import get_full_data_path
1410
import requests
11+
from pythainlp.corpus import get_corpus
12+
1513
__all__ = ["word_freq", "word_freqs"]
1614

15+
_FILENAME = "tnc_freq.txt"
16+
1717

1818
def word_freq(word, domain="all"):
1919
"""
@@ -56,10 +56,10 @@ def word_freqs():
5656
"""
5757
Get word frequency from Thai National Corpus (TNC)
5858
"""
59-
lines = list(get_corpus("tnc_freq.txt"))
59+
lines = list(get_corpus(_FILENAME))
6060
listword = []
6161
for line in lines:
62-
listindata = line.split(" ")
62+
listindata = line.split("\t")
6363
listword.append((listindata[0], int(listindata[1])))
6464

6565
return listword

pythainlp/corpus/ttc.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,22 @@
55
Credit: Korakot Chaovavanich‎
66
https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1
77
"""
8-
import os
98

10-
from pythainlp.corpus import download as download_data
11-
from pythainlp.tools import get_full_data_path
9+
from pythainlp.corpus import get_corpus
1210

1311
__all__ = ["word_freqs"]
1412

13+
_FILENAME = "ttc_freq.txt"
14+
1515

1616
def word_freqs():
1717
"""
1818
Get word frequency from Thai Textbook Corpus (TTC)
1919
"""
20-
path = get_full_data_path("ttc_freq.txt") # try local copy first
21-
if not os.path.exists(path): # if fail, download from internet
22-
download_data("ttc")
23-
24-
with open(path, "r", encoding="utf8") as f:
25-
lines = f.read().splitlines()
26-
f.close()
27-
20+
lines = list(get_corpus(_FILENAME))
2821
listword = []
2922
for line in lines:
30-
listindata = line.split(" ")
23+
listindata = line.split("\t")
3124
listword.append((listindata[0], int(listindata[1])))
3225

3326
return listword

0 commit comments

Comments
 (0)