Skip to content

Commit a28935e

Browse files
committed
Implement process_sequence() as a shortcut to process_key()
1 parent 601a963 commit a28935e

File tree

4 files changed

+114
-155
lines changed

4 files changed

+114
-155
lines changed

README.md

Lines changed: 4 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -12,63 +12,12 @@ Usage
1212

1313
```python
1414
>>> import bogo
15-
>>> bogo.process_key(string='ca', key='s', fallback_sequence='ca')
16-
('', 'cas')
17-
>>> bogo.process_key(string='', key='n', fallback_sequence='cas')
18-
('cán', 'casn')
15+
>>> bogo.process_sequence('meof')
16+
'mèo'
17+
>>> bogo.process_sequence('meo2', rules=bogo.get_vni_definition())
18+
'mèo'
1919
```
2020

21-
`process_key()` is intended to be called successively on each keystroke with
22-
the following arguments.
23-
24-
- `string`: The previously processed string or "".
25-
- `key`: The keystroke.
26-
- `fallback_sequence`: The previous keystrokes.
27-
- `input_method_definition` (optional): A dictionary listing
28-
transformation rules. Defaults to the value returned by `get_telex_definition()`.
29-
- `skip_non_vietnamese` (optional): Whether to skip results that
30-
doesn't seem like Vietnamese. Defaults to True.
31-
32-
It returns a tuple. The first item of which is the processed
33-
Vietnamese string, the second item is the next fallback sequence.
34-
The two items are to be fed back into the next call of process_key()
35-
as `string` and `fallback_sequence`. If `skip_non_vietnamese` is
36-
True and the resulting string doesn't look like Vietnamese,
37-
both items contain the `fallback_sequence`.
38-
39-
Note that when a key is an undo key, it won't get appended to
40-
`fallback_sequence`.
41-
42-
```python
43-
>>> process_key('â', 'a', 'aa')
44-
(aa, aa)
45-
```
46-
47-
`input_method_definition` is a dictionary that maps keystrokes to
48-
their effect string. The effects can be one of the following:
49-
50-
- 'a^': a with circumflex (â), only affect an existing 'a family'
51-
- 'a+': a with breve (ă), only affect an existing 'a family'
52-
- 'e^': e with circumflex (ê), only affect an existing 'e family'
53-
- 'o^': o with circumflex (ô), only affect an existing 'o family'
54-
- 'o*': o with horn (ơ), only affect an existing 'o family'
55-
- 'd-': d with bar (đ), only affect an existing 'd'
56-
- '/': acute (sắc), affect an existing vowel
57-
- '\\': grave (huyền), affect an existing vowel
58-
- '?': hook (hỏi), affect an existing vowel
59-
- '~': tilde (ngã), affect an existing vowel
60-
- '.': dot (nặng), affect an existing vowel
61-
- '<ư': append ư
62-
- '<ơ': append ơ
63-
64-
A keystroke entry can have multiple effects, in which case the
65-
dictionary entry's value should be a list of the possible
66-
effect strings. Although you should try to avoid this if
67-
you are defining a custom input method rule.
68-
69-
We have already defined input method definitions for TELEX and VNI with the
70-
`get_telex_definition()` and `get_vni_definition()` functions.
71-
7221
More help is available as docstring for each module and function.
7322

7423
BoGo is well tested with Python 3.2 and Python 3.3.

bogo/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
from bogo.bogo import process_key, get_telex_definition, get_vni_definition
1+
from bogo.bogo import \
2+
process_key, \
3+
process_sequence, \
4+
get_telex_definition, \
5+
get_vni_definition

bogo/bogo.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,33 @@ def is_processable(comps):
9898
return is_valid_combination(('', comps[1], comps[2]), final_form=False)
9999

100100

101+
def process_sequence(sequence,
102+
rules=None,
103+
skip_non_vietnamese=True):
104+
result = ""
105+
raw = result
106+
107+
for key in sequence:
108+
result, raw = process_key(
109+
string=result,
110+
key=key,
111+
fallback_sequence=raw,
112+
rules=rules,
113+
skip_non_vietnamese=skip_non_vietnamese)
114+
115+
return result
116+
117+
101118
def process_key(string, key,
102-
fallback_sequence="", input_method_definition=None,
119+
fallback_sequence="", rules=None,
103120
skip_non_vietnamese=True):
104121
"""Process a keystroke.
105122
106123
Args:
107124
string: The previously processed string or "".
108125
key: The keystroke.
109126
fallback_sequence: The previous keystrokes.
110-
input_method_definition (optional): A dictionary listing
127+
rules (optional): A dictionary listing
111128
transformation rules. Defaults to get_telex_definition().
112129
skip_non_vietnamese (optional): Whether to skip results that
113130
doesn't seem like Vietnamese. Defaults to True.
@@ -128,7 +145,7 @@ def process_key(string, key,
128145
>>> process_key('â', 'a', 'aa')
129146
(aa, aa)
130147
131-
`input_method_definition` is a dictionary that maps keystrokes to
148+
`rules` is a dictionary that maps keystrokes to
132149
their effect string. The effects can be one of the following:
133150
134151
'a^': a with circumflex (â), only affect an existing 'a family'
@@ -155,8 +172,8 @@ def process_key(string, key,
155172
def default_return():
156173
return string + key, fallback_sequence + key
157174

158-
if input_method_definition is None:
159-
input_method_definition = get_telex_definition()
175+
if rules is None:
176+
rules = get_telex_definition()
160177

161178
comps = utils.separate(string)
162179

@@ -165,7 +182,7 @@ def default_return():
165182

166183
# Find all possible transformations this keypress can generate
167184
trans_list = get_transformation_list(
168-
key, input_method_definition, fallback_sequence)
185+
key, rules, fallback_sequence)
169186

170187
# Then apply them one by one
171188
new_comps = list(comps)
@@ -195,7 +212,7 @@ def default_return():
195212
#
196213
# So we have to clean it up a bit.
197214
def is_telex_like():
198-
return '<ư' in input_method_definition["w"]
215+
return '<ư' in rules["w"]
199216

200217
def undone_vowel_ends_with_u():
201218
return new_comps[1] and new_comps[1][-1].lower() == "u"

bogo/test/test_engine.py

Lines changed: 81 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,12 @@
66
from functools import partial
77
import codecs
88

9-
from bogo.bogo import process_key, Action, get_action
9+
from bogo.bogo import Action, get_action, process_sequence
1010
from bogo.mark import Mark
1111
import os
1212

1313

14-
def process_seq(seq, skip_non_vietnamese=True):
15-
string = ""
16-
raw = string
17-
for i in seq:
18-
string, raw = process_key(string,
19-
i,
20-
fallback_sequence=raw,
21-
skip_non_vietnamese=skip_non_vietnamese)
22-
return string
23-
24-
25-
process_key_no_skip = partial(process_seq, skip_non_vietnamese=False)
14+
process_key_no_skip = partial(process_sequence, skip_non_vietnamese=False)
2615

2716

2817
class TestHelpers():
@@ -48,41 +37,41 @@ def test_reverse(self):
4837

4938
class TestProcessSeq():
5039
def test_normal_typing(self):
51-
eq_(process_seq('v'), 'v')
52-
eq_(process_seq('aw'), 'ă')
53-
eq_(process_seq('w'), 'ư')
54-
eq_(process_seq('ow'), 'ơ')
55-
eq_(process_seq('oo'), 'ô')
56-
eq_(process_seq('Oo'), 'Ô')
57-
eq_(process_seq('dd'), 'đ')
58-
eq_(process_seq('muaf'), 'mùa')
59-
eq_(process_seq('Doongd'), 'Đông')
60-
eq_(process_seq('gif'), 'gì')
61-
eq_(process_seq('loAnj'), 'loẠn')
62-
eq_(process_seq('muongw'), 'mương')
63-
eq_(process_seq('qur'), 'qur')
64-
eq_(process_seq('Tosan'), 'Toán')
65-
eq_(process_seq('tusnw'), 'tứn')
66-
eq_(process_seq('dee'), 'dê')
67-
eq_(process_seq('mowis'), 'mới')
68-
eq_(process_seq('uwa'), 'ưa')
69-
eq_(process_seq('uwo'), 'ưo')
70-
eq_(process_seq('ddx'), 'đx')
71-
eq_(process_seq('hoacw'), 'hoăc')
72-
eq_(process_seq('cuooi'), 'cuôi')
73-
74-
eq_(process_seq('tooi'), 'tôi')
75-
eq_(process_seq('chuyeenr'), 'chuyển')
76-
eq_(process_seq('ddoonjg'), 'động')
77-
eq_(process_seq('nheechs'), 'nhếch')
40+
eq_(process_sequence('v'), 'v')
41+
eq_(process_sequence('aw'), 'ă')
42+
eq_(process_sequence('w'), 'ư')
43+
eq_(process_sequence('ow'), 'ơ')
44+
eq_(process_sequence('oo'), 'ô')
45+
eq_(process_sequence('Oo'), 'Ô')
46+
eq_(process_sequence('dd'), 'đ')
47+
eq_(process_sequence('muaf'), 'mùa')
48+
eq_(process_sequence('Doongd'), 'Đông')
49+
eq_(process_sequence('gif'), 'gì')
50+
eq_(process_sequence('loAnj'), 'loẠn')
51+
eq_(process_sequence('muongw'), 'mương')
52+
eq_(process_sequence('qur'), 'qur')
53+
eq_(process_sequence('Tosan'), 'Toán')
54+
eq_(process_sequence('tusnw'), 'tứn')
55+
eq_(process_sequence('dee'), 'dê')
56+
eq_(process_sequence('mowis'), 'mới')
57+
eq_(process_sequence('uwa'), 'ưa')
58+
eq_(process_sequence('uwo'), 'ưo')
59+
eq_(process_sequence('ddx'), 'đx')
60+
eq_(process_sequence('hoacw'), 'hoăc')
61+
eq_(process_sequence('cuooi'), 'cuôi')
62+
63+
eq_(process_sequence('tooi'), 'tôi')
64+
eq_(process_sequence('chuyeenr'), 'chuyển')
65+
eq_(process_sequence('ddoonjg'), 'động')
66+
eq_(process_sequence('nheechs'), 'nhếch')
7867

7968
# uơ related
80-
eq_(process_seq('quowr'), 'quở')
81-
eq_(process_seq('huow'), 'huơ')
82-
eq_(process_seq('thuowr'), 'thuở')
83-
eq_(process_seq('QUOWR'), 'QUỞ')
84-
eq_(process_seq('HUOW'), 'HUƠ')
85-
eq_(process_seq('THUOWR'), 'THUỞ')
69+
eq_(process_sequence('quowr'), 'quở')
70+
eq_(process_sequence('huow'), 'huơ')
71+
eq_(process_sequence('thuowr'), 'thuở')
72+
eq_(process_sequence('QUOWR'), 'QUỞ')
73+
eq_(process_sequence('HUOW'), 'HUƠ')
74+
eq_(process_sequence('THUOWR'), 'THUỞ')
8675

8776
# English words
8877
eq_(process_key_no_skip('case'), 'cáe')
@@ -91,7 +80,7 @@ def test_normal_typing(self):
9180
@attr('slow')
9281
def test_with_dictionary(self):
9382
def atomic(word, sequence):
94-
eq_(word, process_seq(sequence))
83+
eq_(word, process_sequence(sequence))
9584

9685
path = os.path.join(os.path.dirname(__file__), 'DauCu.sequences')
9786
with codecs.open(path, "r", "utf-8") as tests:
@@ -101,68 +90,68 @@ def atomic(word, sequence):
10190

10291
def test_bugs_related(self):
10392
# naỳ.
104-
eq_(process_seq('nayf.'), 'này.')
93+
eq_(process_sequence('nayf.'), 'này.')
10594

10695
# nguời
107-
eq_(process_seq('nguowif'), 'người')
108-
eq_(process_seq('nguwowif'), 'người')
96+
eq_(process_sequence('nguowif'), 'người')
97+
eq_(process_sequence('nguwowif'), 'người')
10998

11099
# thươ.
111-
eq_(process_seq("thuowr."), "thuở.")
100+
eq_(process_sequence("thuowr."), "thuở.")
112101

113-
eq_(process_seq("[["), "[")
114-
eq_(process_seq("[["), "[")
102+
eq_(process_sequence("[["), "[")
103+
eq_(process_sequence("[["), "[")
115104

116105
# BUG #77
117-
eq_(process_seq("ddiemer"), "điểm")
106+
eq_(process_sequence("ddiemer"), "điểm")
118107

119108
# BUG #78
120-
eq_(process_seq("tuoufw"), "tườu")
109+
eq_(process_sequence("tuoufw"), "tườu")
121110

122111
# BUG #79
123-
eq_(process_seq("huoswc"), "hước")
112+
eq_(process_sequence("huoswc"), "hước")
124113

125114
# BUG #81
126-
eq_(process_seq("khoefo"), "khoèo")
115+
eq_(process_sequence("khoefo"), "khoèo")
127116

128117
# BUG #82
129-
eq_(process_seq("uorw"), "uở")
118+
eq_(process_sequence("uorw"), "uở")
130119

131120
def test_bug_93(self):
132-
eq_(process_seq("{{"), "{")
133-
eq_(process_seq("}}"), "}")
121+
eq_(process_sequence("{{"), "{")
122+
eq_(process_sequence("}}"), "}")
134123

135124
def test_free_key_position(self):
136-
eq_(process_seq('toios'), 'tối')
137-
eq_(process_seq('toois'), 'tối')
138-
eq_(process_seq('toosi'), 'tối')
125+
eq_(process_sequence('toios'), 'tối')
126+
eq_(process_sequence('toois'), 'tối')
127+
eq_(process_sequence('toosi'), 'tối')
139128

140-
eq_(process_seq('tuyenre'), 'tuyển')
141-
eq_(process_seq('tuyener'), 'tuyển')
142-
eq_(process_seq('tuyeren'), 'tuyển')
143-
eq_(process_seq('tuyerne'), 'tuyển')
144-
eq_(process_seq('tuyeern'), 'tuyển')
145-
eq_(process_seq('tuyeenr'), 'tuyển')
129+
eq_(process_sequence('tuyenre'), 'tuyển')
130+
eq_(process_sequence('tuyener'), 'tuyển')
131+
eq_(process_sequence('tuyeren'), 'tuyển')
132+
eq_(process_sequence('tuyerne'), 'tuyển')
133+
eq_(process_sequence('tuyeern'), 'tuyển')
134+
eq_(process_sequence('tuyeenr'), 'tuyển')
146135

147-
eq_(process_seq('tuwrowng'), 'tưởng')
136+
eq_(process_sequence('tuwrowng'), 'tưởng')
148137

149138
def test_undo(self):
150-
eq_(process_seq('aaa'), 'aa')
151-
eq_(process_seq('aww'), 'aw')
152-
eq_(process_seq('ass'), 'as')
153-
eq_(process_seq('aff'), 'af')
154-
eq_(process_seq('arr'), 'ar')
155-
eq_(process_seq('axx'), 'ax')
156-
eq_(process_seq('ajj'), 'aj')
157-
eq_(process_seq('uww'), 'uw')
158-
eq_(process_seq('oww'), 'ow')
159-
160-
eq_(process_seq('huww'), 'huw')
161-
eq_(process_seq('hww'), 'hw')
162-
eq_(process_seq('ww'), 'w')
163-
eq_(process_seq('uww'), 'uw')
164-
165-
eq_(process_seq('DDd'), 'Dd')
139+
eq_(process_sequence('aaa'), 'aa')
140+
eq_(process_sequence('aww'), 'aw')
141+
eq_(process_sequence('ass'), 'as')
142+
eq_(process_sequence('aff'), 'af')
143+
eq_(process_sequence('arr'), 'ar')
144+
eq_(process_sequence('axx'), 'ax')
145+
eq_(process_sequence('ajj'), 'aj')
146+
eq_(process_sequence('uww'), 'uw')
147+
eq_(process_sequence('oww'), 'ow')
148+
149+
eq_(process_sequence('huww'), 'huw')
150+
eq_(process_sequence('hww'), 'hw')
151+
eq_(process_sequence('ww'), 'w')
152+
eq_(process_sequence('uww'), 'uw')
153+
154+
eq_(process_sequence('DDd'), 'Dd')
166155

167156
eq_(process_key_no_skip('Loorngr'), 'Lôngr')
168157
eq_(process_key_no_skip('LOorngr'), 'LÔngr')
@@ -172,7 +161,7 @@ def test_undo(self):
172161

173162
def test_non_vn(self):
174163
def atomic(word):
175-
eq_(process_seq(word), word)
164+
eq_(process_sequence(word), word)
176165

177166
tests = [
178167
"system",
@@ -190,8 +179,8 @@ def atomic(word):
190179
for test in tests:
191180
yield atomic, test
192181

193-
eq_(process_seq("aans."), "ấn.")
194-
eq_(process_seq("aans]"), "ấn]")
195-
# eq_(process_seq("aans.tuongwj"), "ấn.tượng")
196-
eq_(process_seq("gi[f"), "giờ")
197-
# eq_(process_seq("taojc"), "taojc")
182+
eq_(process_sequence("aans."), "ấn.")
183+
eq_(process_sequence("aans]"), "ấn]")
184+
# eq_(process_sequence("aans.tuongwj"), "ấn.tượng")
185+
eq_(process_sequence("gi[f"), "giờ")
186+
# eq_(process_sequence("taojc"), "taojc")

0 commit comments

Comments
 (0)