Skip to content

Commit ed46d65

Browse files
authored
Merge pull request #303 from PyThaiNLP/thai_time
Add pythainlp.util.thai_time
2 parents 0a4ee41 + c1d4750 commit ed46d65

File tree

7 files changed

+296
-55
lines changed

7 files changed

+296
-55
lines changed

appveyor.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ install:
9494
- SET PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%
9595
# - ECHO %PATH%
9696
- python --version
97-
- pip install --disable-pip-version-check --user --upgrade pip setuptools
97+
# - pip install --disable-pip-version-check --user --upgrade pip setuptools
9898
- pip --version
9999
- pip install coveralls[yaml]
100100
- pip install coverage
@@ -136,7 +136,7 @@ on_success:
136136
# Note: Cygwin is not available on Visual Studio 2019, can try Msys2.
137137
- "ECHO Remove old or huge cache"
138138
- C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -type f -mtime +360 -delete
139-
- C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -type f -size +100M -delete
139+
- C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -type f -size +50M -delete
140140
- C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -empty -delete
141141
# Show size of cache
142142
- C:\cygwin\bin\du -hs "%LOCALAPPDATA%/pip/Cache"

docs/api/util.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ Modules
2929
.. autofunction:: thai_to_eng
3030
.. autofunction:: thai_digit_to_arabic_digit
3131
.. autofunction:: thaiword_to_num
32+
.. autofunction:: thai_time

pythainlp/util/__init__.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
"arabic_digit_to_thai_digit",
88
"bahttext",
99
"collate",
10+
"countthai",
1011
"delete_tone",
12+
"deletetone", # Will deprecated after version 2.1
1113
"digit_to_text",
1214
"eng_to_thai",
1315
"find_keyword",
14-
"countthai",
1516
"is_native_thai",
1617
"isthai",
1718
"isthaichar",
@@ -22,9 +23,11 @@
2223
"reign_year_to_ad",
2324
"text_to_arabic_digit",
2425
"text_to_thai_digit",
26+
"thai_digit_to_arabic_digit",
2527
"thai_strftime",
28+
"thai_time",
2629
"thai_to_eng",
27-
"thai_digit_to_arabic_digit",
30+
"thaicheck", # Will deprecated after version 2.1
2831
"thaiword_to_num",
2932
]
3033

@@ -40,8 +43,9 @@
4043
)
4144
from .keyboard import eng_to_thai, thai_to_eng
4245
from .keywords import find_keyword, rank
43-
from .normalize import delete_tone, normalize
46+
from .normalize import delete_tone, deletetone, normalize
4447
from .numtoword import bahttext, num_to_thaiword
4548
from .thai import countthai, isthai, isthaichar
46-
from .thaiwordcheck import is_native_thai
49+
from .thai_time import thai_time
50+
from .thaiwordcheck import is_native_thai, thaicheck
4751
from .wordtonum import thaiword_to_num

pythainlp/util/date.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,31 +114,38 @@ def _thai_strftime(datetime: datetime.datetime, fmt_char: str) -> str:
114114
)
115115
elif fmt_char == "D":
116116
# Equivalent to ``%m/%d/%y''
117-
str_ = "{}/{}".format(datetime.strftime("%m/%d"),
118-
str(datetime.year + _BE_AD_DIFFERENCE)[-2:])
117+
str_ = "{}/{}".format(
118+
datetime.strftime("%m/%d"),
119+
str(datetime.year + _BE_AD_DIFFERENCE)[-2:],
120+
)
119121
elif fmt_char == "F":
120122
# Equivalent to ``%Y-%m-%d''
121-
str_ = "{}-{}".format(str(datetime.year + _BE_AD_DIFFERENCE),
122-
datetime.strftime("%m-%d"))
123+
str_ = "{}-{}".format(
124+
str(datetime.year + _BE_AD_DIFFERENCE), datetime.strftime("%m-%d")
125+
)
123126
elif fmt_char == "G":
124-
# ISO 8601 year with century representing the year that contains the greater part of the ISO week (%V). Monday as the first day of the week.
127+
# ISO 8601 year with century representing the year that contains the
128+
# greater part of the ISO week (%V). Monday as the first day of the week.
125129
str_ = str(int(datetime.strftime("%G")) + _BE_AD_DIFFERENCE)
126130
elif fmt_char == "g":
127131
# Same year as in ``%G'', but as a decimal number without century (00-99).
128132
str_ = str(int(datetime.strftime("%G")) + _BE_AD_DIFFERENCE)[-2:]
129133
elif fmt_char == "v":
130134
# BSD extension, ' 6-Oct-1976'
131-
str_ = "{:>2}-{}-{}".format(datetime.day,
132-
thai_abbr_months[datetime.month - 1],
133-
datetime.year + _BE_AD_DIFFERENCE)
135+
str_ = "{:>2}-{}-{}".format(
136+
datetime.day,
137+
thai_abbr_months[datetime.month - 1],
138+
datetime.year + _BE_AD_DIFFERENCE,
139+
)
134140
elif fmt_char == "X":
135141
# Locale’s appropriate time representation.
136142
str_ = datetime.strftime("%H:%M:%S")
137143
elif fmt_char == "x":
138144
# Locale’s appropriate date representation.
139145
str_ = "{}/{}/{}".format(
140-
_padding(datetime.day), _padding(
141-
datetime.month), datetime.year + _BE_AD_DIFFERENCE
146+
_padding(datetime.day),
147+
_padding(datetime.month),
148+
datetime.year + _BE_AD_DIFFERENCE,
142149
)
143150
elif fmt_char == "Y":
144151
# Year with century
@@ -147,7 +154,8 @@ def _thai_strftime(datetime: datetime.datetime, fmt_char: str) -> str:
147154
# Year without century
148155
str_ = str(datetime.year + _BE_AD_DIFFERENCE)[2:4]
149156
elif fmt_char == "+":
150-
# National representation of the date and time (the format is similar to that produced by date(1))
157+
# National representation of the date and time
158+
# (the format is similar to that produced by date(1))
151159
# Wed 6 Oct 1976 01:40:00
152160
str_ = "{:<2} {:>2} {} {} {}".format(
153161
thai_abbr_weekdays[datetime.weekday()],
@@ -278,10 +286,13 @@ def thai_strftime(
278286
): # check if requires localization
279287
str_ = _thai_strftime(datetime, fmt_char_nopad)
280288
else:
289+
# Windows may not support this
281290
str_ = datetime.strftime(f"%-{fmt_char_nopad}")
282291
i = i + 1 # consume char after "-"
283292
else:
284-
str_ = "-" # "-" at the end of string has no meaning
293+
str_ = (
294+
"-"
295+
) # "-" at the end of string has no meaning
285296
elif fmt_char == "_":
286297
# GNU libc extension, explicitly specify space (" ") for padding
287298
# Not implemented yet

pythainlp/util/thai_time.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
thai_time() - Spell out time to Thai words
4+
"""
5+
from datetime import datetime, time
6+
from typing import Union
7+
8+
from pythainlp.util.numtoword import num_to_thaiword
9+
10+
_TIME_FORMAT_WITH_SEC = "%H:%M:%S"
11+
_TIME_FORMAT_WITHOUT_SEC = "%H:%M"
12+
13+
14+
def _format_6h(h: int) -> str:
15+
"""
16+
Thai time (6-hour clock)
17+
"""
18+
text = ""
19+
20+
if h == 0:
21+
text += "เที่ยงคืน"
22+
elif h < 7:
23+
text += "ตี" + num_to_thaiword(h)
24+
elif h < 12:
25+
text += num_to_thaiword(h - 6) + "โมงเช้า"
26+
elif h == 12:
27+
text += "เที่ยง"
28+
elif h < 18:
29+
if h == 13:
30+
text += "บ่ายโมง"
31+
else:
32+
text += "บ่าย" + num_to_thaiword(h - 12) + "โมง"
33+
elif h == 18:
34+
text += "หกโมงเย็น"
35+
else:
36+
text += num_to_thaiword(h - 18) + "ทุ่ม"
37+
38+
return text
39+
40+
41+
def _format_m6h(h: int) -> str:
42+
"""
43+
Thai time (modified 6-hour clock)
44+
"""
45+
text = ""
46+
47+
if h == 0:
48+
text += "เที่ยงคืน"
49+
elif h < 6:
50+
text += "ตี" + num_to_thaiword(h)
51+
elif h < 12:
52+
text += num_to_thaiword(h) + "โมง"
53+
elif h == 12:
54+
text += "เที่ยง"
55+
elif h < 19:
56+
text += num_to_thaiword(h - 12) + "โมง"
57+
else:
58+
text += num_to_thaiword(h - 18) + "ทุ่ม"
59+
60+
return text
61+
62+
63+
def _format_24h(h: int) -> str:
64+
"""
65+
Thai time (24-hour clock)
66+
"""
67+
text = num_to_thaiword(h) + "นาฬิกา"
68+
return text
69+
70+
71+
def _format(
72+
h: int,
73+
m: int,
74+
s: int,
75+
fmt: str = "24h",
76+
precision: Union[str, None] = None,
77+
) -> str:
78+
text = ""
79+
if fmt == "6h":
80+
text = _format_6h(h)
81+
elif fmt == "m6h":
82+
text = _format_m6h(h)
83+
elif fmt == "24h":
84+
text = _format_24h(h)
85+
else:
86+
raise NotImplementedError(fmt)
87+
88+
if precision == "minute" or precision == "second":
89+
if (
90+
m == 30
91+
and (s == 0 or precision == "minute")
92+
and (fmt == "6h" or fmt == "m6h")
93+
):
94+
text += "ครึ่ง"
95+
else:
96+
text += num_to_thaiword(m) + "นาที"
97+
if precision == "second":
98+
text += num_to_thaiword(s) + "วินาที"
99+
else:
100+
if m:
101+
if m == 30 and s == 0 and (fmt == "6h" or fmt == "m6h"):
102+
text += "ครึ่ง"
103+
else:
104+
text += num_to_thaiword(m) + "นาที"
105+
if s:
106+
text += num_to_thaiword(s) + "วินาที"
107+
108+
return text
109+
110+
111+
def thai_time(
112+
time_data: Union[time, datetime, str],
113+
fmt: str = "24h",
114+
precision: Union[str, None] = None,
115+
) -> str:
116+
"""
117+
Spell out time to Thai words.
118+
119+
:param str time_data: time input, can be a datetime.time object \
120+
or a datetime.datetime object \
121+
or a string (in H:M or H:M:S format, using 24-hour clock)
122+
:param str fmt: time output format
123+
* *24h* - 24-hour clock (default)
124+
* *6h* - 6-hour clock
125+
* *m6h* - Modified 6-hour clock
126+
:param str precision: precision of the spell out
127+
* *minute* - always spell out to minute level
128+
* *second* - always spell out to second level
129+
* None - spell out only non-zero parts
130+
:return: Time spell out in Thai words
131+
:rtype: str
132+
133+
:Example:
134+
135+
thai_time("8:17")
136+
# output:
137+
# แปดนาฬิกาสิบเจ็ดนาที
138+
139+
thai_time("8:17", "6h")
140+
# output:
141+
# สองโมงเช้าสิบเจ็ดนาที
142+
143+
thai_time("8:17", "m6h")
144+
# output:
145+
# แปดโมงสิบเจ็ดนาที
146+
147+
thai_time("18:30", fmt="m6h")
148+
# output:
149+
# หกโมงครึ่ง
150+
151+
thai_time(datetime.time(12, 3, 0))
152+
# output:
153+
# สิบสองนาฬิกาสามนาที
154+
155+
thai_time(datetime.time(12, 3, 0), precision="second")
156+
# output:
157+
# สิบสองนาฬิกาสามนาทีศูนย์วินาที
158+
"""
159+
_time = None
160+
161+
if isinstance(time_data, time) or isinstance(time_data, datetime):
162+
_time = time_data
163+
else:
164+
if not isinstance(time_data, str):
165+
raise TypeError(
166+
"Time data must be a datetime.time object, a datetime.datetime object, or a string."
167+
)
168+
169+
if not time_data:
170+
raise ValueError("Time string cannot be empty.")
171+
172+
try:
173+
_time = datetime.strptime(time_data, _TIME_FORMAT_WITH_SEC)
174+
except ValueError:
175+
try:
176+
_time = datetime.strptime(time_data, _TIME_FORMAT_WITHOUT_SEC)
177+
except ValueError:
178+
pass
179+
180+
if not _time:
181+
raise ValueError(
182+
f"Time string '{time_data}' does not match H:M or H:M:S format."
183+
)
184+
185+
text = _format(_time.hour, _time.minute, _time.second, fmt, precision)
186+
187+
return text

pythainlp/util/thaiwordcheck.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def is_native_thai(word: str) -> bool:
111111
chs = re.findall(_TH_CONSONANTS_PATTERN, word)
112112
if not chs:
113113
return False
114-
114+
115115
# If there's only one Thai consonant -> it can be a native Thai
116116
if len(chs) == 1:
117117
return True
@@ -131,6 +131,6 @@ def is_native_thai(word: str) -> bool:
131131
def thaicheck(word: str) -> bool:
132132
warnings.warn(
133133
"thaicheck is deprecated, use is_native_thai instead",
134-
DeprecationWarning
134+
DeprecationWarning,
135135
)
136136
return is_native_thai(word)

0 commit comments

Comments
 (0)