Skip to content

Commit 49683e4

Browse files
committed
Update docstrings and make some symbols module-private
1 parent 07fe5d0 commit 49683e4

File tree

6 files changed

+111
-59
lines changed

6 files changed

+111
-59
lines changed

bogo/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,27 @@
1+
"""\
2+
BoGo is a Python 3 Vietnamese input method conversion library. This library
3+
is intentionally functional with no internal state and side-effect.
4+
5+
Usage
6+
-----
7+
8+
>>> import bogo
9+
>>> bogo.process_sequence('meof')
10+
'mèo'
11+
>>> bogo.process_sequence('meo2', rules=bogo.get_vni_definition())
12+
'mèo'
13+
```
14+
15+
Some functions from bogo.bogo are exported to package toplevel:
16+
17+
- process_key()
18+
- process_sequence()
19+
- get_telex_definition()
20+
- get_vni_definition()
21+
22+
Read help(bogo.bogo) for more help.
23+
"""
24+
125
from bogo.bogo import \
226
process_key, \
327
process_sequence, \

bogo/accent.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
# along with ibus-bogo. If not, see <http://www.gnu.org/licenses/>.
2121
#
2222

23+
"""
24+
Utility functions to deal with accents (should have been called tones),
25+
which are diacritical markings that changes the pitch of a character.
26+
E.g. the acute accent in á.
27+
"""
28+
2329
# TODO: add is_valid_accent() to be on par with mark.py and use it
2430
# at the end of new_bogo_engine.transform()
2531

bogo/bogo.py

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
# along with ibus-bogo. If not, see <http://www.gnu.org/licenses/>.
2121
#
2222

23+
"""
24+
Read the docstring for process_sequence() and process_key() first.
25+
"""
26+
2327
from __future__ import unicode_literals
2428
from bogo.validation import is_valid_combination
2529
from bogo import utils, accent, mark
@@ -30,7 +34,7 @@
3034
Accent = accent.Accent
3135

3236

33-
class Action:
37+
class _Action:
3438
UNDO = 3
3539
ADD_MARK = 2
3640
ADD_ACCENT = 1
@@ -93,14 +97,21 @@ def get_vni_definition():
9397
}
9498

9599

96-
def is_processable(comps):
100+
def _is_processable(comps):
97101
# For now only check the last 2 components
98102
return is_valid_combination(('', comps[1], comps[2]), final_form=False)
99103

100104

101105
def process_sequence(sequence,
102106
rules=None,
103107
skip_non_vietnamese=True):
108+
"""\
109+
Convert a key sequence into a Vietnamese string with diacritical marks.
110+
111+
Args:
112+
rules (optional): see docstring for process_key().
113+
skip_non_vietnamese (optional): see docstring for process_key().
114+
"""
104115
result = ""
105116
raw = result
106117

@@ -177,37 +188,37 @@ def default_return():
177188

178189
comps = utils.separate(string)
179190

180-
# if not is_processable(comps):
191+
# if not _is_processable(comps):
181192
# return default_return()
182193

183194
# Find all possible transformations this keypress can generate
184-
trans_list = get_transformation_list(
195+
trans_list = _get_transformation_list(
185196
key, rules, fallback_sequence)
186197

187198
# Then apply them one by one
188199
new_comps = list(comps)
189200
for trans in trans_list:
190-
new_comps = transform(new_comps, trans)
201+
new_comps = _transform(new_comps, trans)
191202

192203
if new_comps == comps:
193204
tmp = list(new_comps)
194205

195206
# If none of the transformations (if any) work
196207
# then this keystroke is probably an undo key.
197-
if can_undo(new_comps, trans_list):
208+
if _can_undo(new_comps, trans_list):
198209
# The prefix "_" means undo.
199210
for trans in map(lambda x: "_" + x, trans_list):
200-
new_comps = transform(new_comps, trans)
211+
new_comps = _transform(new_comps, trans)
201212

202213
# Undoing the w key with the TELEX input method with the
203214
# w:<ư extension requires some care.
204215
#
205216
# The input (ư, w) should be undone as w
206217
# on the other hand, (ư, uw) should return uw.
207218
#
208-
# transform() is not aware of the 2 ways to generate
219+
# _transform() is not aware of the 2 ways to generate
209220
# ư in TELEX and always think ư was created by uw.
210-
# Therefore, after calling transform() to undo ư,
221+
# Therefore, after calling _transform() to undo ư,
211222
# we always get ['', 'u', ''].
212223
#
213224
# So we have to clean it up a bit.
@@ -251,14 +262,14 @@ def user_didnt_type_uww():
251262
return result
252263

253264

254-
def get_transformation_list(key, im, fallback_sequence):
265+
def _get_transformation_list(key, im, fallback_sequence):
255266
"""
256-
Return the list of transformations inferred from the entered key. The
257-
map between transform types and keys is given by module
258-
bogo_config (if exists) or by variable simple_telex_im
267+
Return the list of transformations inferred from the entered key. The
268+
map between transform types and keys is given by module
269+
bogo_config (if exists) or by variable simple_telex_im
259270
260-
if entered key is not in im, return "+key", meaning appending
261-
the entered key to current text
271+
if entered key is not in im, return "+key", meaning appending
272+
the entered key to current text
262273
"""
263274
# if key in im:
264275
# lkey = key
@@ -282,7 +293,7 @@ def get_transformation_list(key, im, fallback_sequence):
282293
# TODO Use takewhile()/dropwhile() to process the last IM keypress
283294
# instead of assuming it's the last key in fallback_sequence.
284295
t = list(map(lambda x: "_" + x,
285-
get_transformation_list(fallback_sequence[-2], im,
296+
_get_transformation_list(fallback_sequence[-2], im,
286297
fallback_sequence[:-1])))
287298
# print(t)
288299
trans_list = t
@@ -294,61 +305,61 @@ def get_transformation_list(key, im, fallback_sequence):
294305
return ['+' + key]
295306

296307

297-
def get_action(trans):
308+
def _get_action(trans):
298309
"""
299310
Return the action inferred from the transformation `trans`.
300311
and the parameter going with this action
301-
An Action.ADD_MARK goes with a Mark
302-
while an Action.ADD_ACCENT goes with an Accent
312+
An _Action.ADD_MARK goes with a Mark
313+
while an _Action.ADD_ACCENT goes with an Accent
303314
"""
304315
# TODO: VIQR-like convention
305316
if trans[0] in ('<', '+'):
306-
return Action.ADD_CHAR, trans[1]
317+
return _Action.ADD_CHAR, trans[1]
307318
if trans[0] == "_":
308-
return Action.UNDO, trans[1:]
319+
return _Action.UNDO, trans[1:]
309320
if len(trans) == 2:
310321
if trans[1] == '^':
311-
return Action.ADD_MARK, Mark.HAT
322+
return _Action.ADD_MARK, Mark.HAT
312323
if trans[1] == '+':
313-
return Action.ADD_MARK, Mark.BREVE
324+
return _Action.ADD_MARK, Mark.BREVE
314325
if trans[1] == '*':
315-
return Action.ADD_MARK, Mark.HORN
326+
return _Action.ADD_MARK, Mark.HORN
316327
if trans[1] == "-":
317-
return Action.ADD_MARK, Mark.BAR
328+
return _Action.ADD_MARK, Mark.BAR
318329
# if trans[1] == "_":
319-
# return Action.ADD_MARK, Mark.NONE
330+
# return _Action.ADD_MARK, Mark.NONE
320331
else:
321332
if trans[0] == "\\":
322-
return Action.ADD_ACCENT, Accent.GRAVE
333+
return _Action.ADD_ACCENT, Accent.GRAVE
323334
if trans[0] == "/":
324-
return Action.ADD_ACCENT, Accent.ACUTE
335+
return _Action.ADD_ACCENT, Accent.ACUTE
325336
if trans[0] == "?":
326-
return Action.ADD_ACCENT, Accent.HOOK
337+
return _Action.ADD_ACCENT, Accent.HOOK
327338
if trans[0] == "~":
328-
return Action.ADD_ACCENT, Accent.TIDLE
339+
return _Action.ADD_ACCENT, Accent.TIDLE
329340
if trans[0] == ".":
330-
return Action.ADD_ACCENT, Accent.DOT
341+
return _Action.ADD_ACCENT, Accent.DOT
331342
# if trans[0] == "_":
332-
# return Action.ADD_ACCENT, Accent.NONE
343+
# return _Action.ADD_ACCENT, Accent.NONE
333344

334345

335-
def transform(comps, trans):
346+
def _transform(comps, trans):
336347
"""
337348
Transform the given string with transform type trans
338349
"""
339-
logging.debug("== In transform(%s, %s) ==", comps, trans)
350+
logging.debug("== In _transform(%s, %s) ==", comps, trans)
340351
components = list(comps)
341352

342-
action, parameter = get_action(trans)
343-
if action == Action.ADD_MARK and \
353+
action, parameter = _get_action(trans)
354+
if action == _Action.ADD_MARK and \
344355
components[2] == "" and \
345356
mark.strip(components[1]).lower() in ['oe', 'oa'] and trans == "o^":
346-
action, parameter = Action.ADD_CHAR, trans[0]
357+
action, parameter = _Action.ADD_CHAR, trans[0]
347358

348-
if action == Action.ADD_ACCENT:
359+
if action == _Action.ADD_ACCENT:
349360
logging.debug("add_accent(%s, %s)", components, parameter)
350361
components = accent.add_accent(components, parameter)
351-
elif action == Action.ADD_MARK and mark.is_valid_mark(components, trans):
362+
elif action == _Action.ADD_MARK and mark.is_valid_mark(components, trans):
352363
logging.debug("add_mark(%s, %s)", components, parameter)
353364
components = mark.add_mark(components, parameter)
354365

@@ -367,7 +378,7 @@ def transform(comps, trans):
367378
components[1] = ("u", "U")[components[1][0].isupper()] + components[1][1]
368379
components = accent.add_accent(components, ac)
369380

370-
elif action == Action.ADD_CHAR:
381+
elif action == _Action.ADD_CHAR:
371382
if trans[0] == "<":
372383
if not components[2]:
373384
# Only allow ư, ơ or ươ sitting alone in the middle part
@@ -388,10 +399,10 @@ def transform(comps, trans):
388399
components[1] = ('ư', 'Ư')[components[1][0].isupper()] + \
389400
('ơ', 'Ơ')[components[1][1].isupper()] + components[1][2:]
390401
components = accent.add_accent(components, ac)
391-
elif action == Action.UNDO:
392-
components = reverse(components, trans[1:])
402+
elif action == _Action.UNDO:
403+
components = _reverse(components, trans[1:])
393404

394-
if action == Action.ADD_MARK or (action == Action.ADD_CHAR and parameter.isalpha()):
405+
if action == _Action.ADD_MARK or (action == _Action.ADD_CHAR and parameter.isalpha()):
395406
# If there is any accent, remove and reapply it
396407
# because it is likely to be misplaced in previous transformations
397408
ac = accent.get_accent_string(components[1])
@@ -404,28 +415,28 @@ def transform(comps, trans):
404415
return components
405416

406417

407-
def reverse(components, trans):
418+
def _reverse(components, trans):
408419
"""
409420
Reverse the effect of transformation 'trans' on 'components'
410421
If the transformation does not affect the components, return the original
411422
string.
412423
"""
413424

414-
action, parameter = get_action(trans)
425+
action, parameter = _get_action(trans)
415426
comps = list(components)
416427
string = utils.join(comps)
417428

418-
if action == Action.ADD_CHAR and string[-1].lower() == parameter.lower():
429+
if action == _Action.ADD_CHAR and string[-1].lower() == parameter.lower():
419430
if comps[2]:
420431
i = 2
421432
elif comps[1]:
422433
i = 1
423434
else:
424435
i = 0
425436
comps[i] = comps[i][:-1]
426-
elif action == Action.ADD_ACCENT:
437+
elif action == _Action.ADD_ACCENT:
427438
comps = accent.add_accent(comps, Accent.NONE)
428-
elif action == Action.ADD_MARK:
439+
elif action == _Action.ADD_MARK:
429440
if parameter == Mark.BAR:
430441
comps[0] = comps[0][:-1] + \
431442
mark.add_mark_char(comps[0][-1:], Mark.NONE)
@@ -436,24 +447,24 @@ def reverse(components, trans):
436447
return comps
437448

438449

439-
def can_undo(comps, trans_list):
450+
def _can_undo(comps, trans_list):
440451
"""
441452
Return whether a components can be undone with one of the transformation in
442453
trans_list.
443454
"""
444455
comps = list(comps)
445456
accent_list = list(map(accent.get_accent_char, comps[1]))
446457
mark_list = list(map(mark.get_mark_char, utils.join(comps)))
447-
action_list = list(map(lambda x: get_action(x), trans_list))
458+
action_list = list(map(lambda x: _get_action(x), trans_list))
448459

449460
def atomic_check(action):
450461
"""
451462
Check if the `action` created one of the marks, accents, or characters
452463
in `comps`.
453464
"""
454-
return (action[0] == Action.ADD_ACCENT and action[1] in accent_list) \
455-
or (action[0] == Action.ADD_MARK and action[1] in mark_list) \
456-
or (action[0] == Action.ADD_CHAR and action[1] == \
465+
return (action[0] == _Action.ADD_ACCENT and action[1] in accent_list) \
466+
or (action[0] == _Action.ADD_MARK and action[1] in mark_list) \
467+
or (action[0] == _Action.ADD_CHAR and action[1] == \
457468
accent.remove_accent_char(comps[1][-1])) # ơ, ư
458469

459470
return any(map(atomic_check, action_list))

bogo/mark.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
# along with ibus-bogo. If not, see <http://www.gnu.org/licenses/>.
2121
#
2222

23+
"""
24+
Utility functions to deal with marks, which are diacritical markings
25+
to change the base sound of a character but not its tonal quality.
26+
E.g. the hat mark in â.
27+
"""
28+
2329
from __future__ import unicode_literals
2430

2531
from bogo import accent, utils

bogo/test/test_engine.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from functools import partial
77
import codecs
88

9-
from bogo.bogo import Action, get_action, process_sequence
9+
from bogo.bogo import _Action, _get_action, process_sequence
1010
from bogo.mark import Mark
1111
import os
1212

@@ -18,12 +18,12 @@ class TestHelpers():
1818
def test_transform(self):
1919
pass
2020

21-
def test_get_action(self):
21+
def test__get_action(self):
2222
# Add mark
23-
eq_(get_action('a^'), (Action.ADD_MARK, Mark.HAT))
24-
eq_(get_action('a+'), (Action.ADD_MARK, Mark.BREVE))
25-
eq_(get_action('o*'), (Action.ADD_MARK, Mark.HORN))
26-
eq_(get_action('d-'), (Action.ADD_MARK, Mark.BAR))
23+
eq_(_get_action('a^'), (_Action.ADD_MARK, Mark.HAT))
24+
eq_(_get_action('a+'), (_Action.ADD_MARK, Mark.BREVE))
25+
eq_(_get_action('o*'), (_Action.ADD_MARK, Mark.HORN))
26+
eq_(_get_action('d-'), (_Action.ADD_MARK, Mark.BAR))
2727

2828
def test_get_transformation_list(self):
2929
pass

bogo/validation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020
# along with ibus-bogo. If not, see <http://www.gnu.org/licenses/>.
2121
#
2222

23+
"""
24+
Utility functions to check whether a word looks like Vietnamese
25+
or not (i.e. can be pronounced by a Vietnamese speaker).
26+
"""
27+
2328
from __future__ import unicode_literals
2429
import collections
2530
from bogo import accent, mark, utils

0 commit comments

Comments
 (0)