1
+
2
+
3
+ <!DOCTYPE html>
4
+ <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
5
+ <!--[if gt IE 8]><!--> < html class ="no-js " lang ="en " > <!--<![endif]-->
6
+ < head >
7
+ < meta charset ="utf-8 ">
8
+
9
+ < meta name ="viewport " content ="width=device-width, initial-scale=1.0 ">
10
+
11
+ < title > pythainlp.soundex.udom83 — PyThaiNLP 2.0 documentation</ title >
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+ < link rel ="stylesheet " href ="../../../_static/css/theme.css " type ="text/css " />
28
+ < link rel ="stylesheet " href ="../../../_static/pygments.css " type ="text/css " />
29
+ < link rel ="index " title ="Index " href ="../../../genindex.html " />
30
+ < link rel ="search " title ="Search " href ="../../../search.html " />
31
+
32
+
33
+ < script src ="../../../_static/js/modernizr.min.js "> </ script >
34
+
35
+ </ head >
36
+
37
+ < body class ="wy-body-for-nav ">
38
+
39
+
40
+ < div class ="wy-grid-for-nav ">
41
+
42
+
43
+ < nav data-toggle ="wy-nav-shift " class ="wy-nav-side ">
44
+ < div class ="wy-side-scroll ">
45
+ < div class ="wy-side-nav-search ">
46
+
47
+
48
+
49
+ < a href ="../../../index.html " class ="icon icon-home "> PyThaiNLP
50
+
51
+
52
+
53
+ </ a >
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+ < div role ="search ">
62
+ < form id ="rtd-search-form " class ="wy-form " action ="../../../search.html " method ="get ">
63
+ < input type ="text " name ="q " placeholder ="Search docs " />
64
+ < input type ="hidden " name ="check_keywords " value ="yes " />
65
+ < input type ="hidden " name ="area " value ="default " />
66
+ </ form >
67
+ </ div >
68
+
69
+
70
+ </ div >
71
+
72
+ < div class ="wy-menu wy-menu-vertical " data-spy ="affix " role ="navigation " aria-label ="main navigation ">
73
+
74
+
75
+
76
+
77
+
78
+
79
+ < p class ="caption "> < span class ="caption-text "> Notes</ span > </ p >
80
+ < ul >
81
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../notes/getting_started.html "> Getting Started</ a > </ li >
82
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../notes/installation.html "> Installation</ a > </ li >
83
+ </ ul >
84
+ < p class ="caption "> < span class ="caption-text "> Package reference:</ span > </ p >
85
+ < ul >
86
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/corpus.html "> pythainlp.corpus</ a > </ li >
87
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/soundex.html "> pythainlp.soundex</ a > </ li >
88
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/spell.html "> pythainlp.spell</ a > </ li >
89
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/summarize.html "> pythainlp.summarize</ a > </ li >
90
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/tag.html "> pythainlp.tag</ a > </ li >
91
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/tokenize.html "> pythainlp.tokenize</ a > </ li >
92
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/tools.html "> pythainlp.tools</ a > </ li >
93
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/transliterate.html "> pythainlp.transliterate</ a > </ li >
94
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/ulmfit.html "> pythainlp.ulmfit</ a > </ li >
95
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/util.html "> pythainlp.util</ a > </ li >
96
+ < li class ="toctree-l1 "> < a class ="reference internal " href ="../../../api/word_vector.html "> pythainlp.word_vector</ a > </ li >
97
+ </ ul >
98
+
99
+
100
+
101
+ </ div >
102
+ </ div >
103
+ </ nav >
104
+
105
+ < section data-toggle ="wy-nav-shift " class ="wy-nav-content-wrap ">
106
+
107
+
108
+ < nav class ="wy-nav-top " aria-label ="top navigation ">
109
+
110
+ < i data-toggle ="wy-nav-top " class ="fa fa-bars "> </ i >
111
+ < a href ="../../../index.html "> PyThaiNLP</ a >
112
+
113
+ </ nav >
114
+
115
+
116
+ < div class ="wy-nav-content ">
117
+
118
+ < div class ="rst-content ">
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+ < div role ="navigation " aria-label ="breadcrumbs navigation ">
137
+
138
+ < ul class ="wy-breadcrumbs ">
139
+
140
+ < li > < a href ="../../../index.html "> Docs</ a > »</ li >
141
+
142
+ < li > < a href ="../../index.html "> Module code</ a > »</ li >
143
+
144
+ < li > < a href ="../soundex.html "> pythainlp.soundex</ a > »</ li >
145
+
146
+ < li > pythainlp.soundex.udom83</ li >
147
+
148
+
149
+ < li class ="wy-breadcrumbs-aside ">
150
+
151
+ </ li >
152
+
153
+ </ ul >
154
+
155
+
156
+ < hr />
157
+ </ div >
158
+ < div role ="main " class ="document " itemscope ="itemscope " itemtype ="http://schema.org/Article ">
159
+ < div itemprop ="articleBody ">
160
+
161
+ < h1 > Source code for pythainlp.soundex.udom83</ h1 > < div class ="highlight "> < pre >
162
+ < span > </ span > < span class ="c1 "> # -*- coding: utf-8 -*-</ span >
163
+ < span class ="sd "> """</ span >
164
+ < span class ="sd "> Thai soundex - Udom83 system</ span >
165
+
166
+ < span class ="sd "> Python implementation: Korakot Chaovavanich</ span >
167
+ < span class ="sd "> https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8</ span >
168
+ < span class ="sd "> """</ span >
169
+ < span class ="kn "> import</ span > < span class ="nn "> re</ span >
170
+
171
+ < span class ="n "> _RE_1</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "รร([เ-ไ])"</ span > < span class ="p "> )</ span >
172
+ < span class ="n "> _RE_2</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "รร([ก-ฮ][ก-ฮเ-ไ])"</ span > < span class ="p "> )</ span >
173
+ < span class ="n "> _RE_3</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "รร([ก-ฮ][ะ-ู่-์])"</ span > < span class ="p "> )</ span >
174
+ < span class ="n "> _RE_4</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "รร"</ span > < span class ="p "> )</ span >
175
+ < span class ="n "> _RE_5</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "ไ([ก-ฮ]ย)"</ span > < span class ="p "> )</ span >
176
+ < span class ="n "> _RE_6</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "[ไใ]([ก-ฮ])"</ span > < span class ="p "> )</ span >
177
+ < span class ="n "> _RE_7</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "ำ(ม[ะ-ู])"</ span > < span class ="p "> )</ span >
178
+ < span class ="n "> _RE_8</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "ำม"</ span > < span class ="p "> )</ span >
179
+ < span class ="n "> _RE_9</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "ำ"</ span > < span class ="p "> )</ span >
180
+ < span class ="n "> _RE_10</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "จน์|มณ์|ณฑ์|ทร์|ตร์|[ก-ฮ]์|[ก-ฮ][ะ-ู]์"</ span > < span class ="p "> )</ span >
181
+ < span class ="n "> _RE_11</ span > < span class ="o "> =</ span > < span class ="n "> re</ span > < span class ="o "> .</ span > < span class ="n "> compile</ span > < span class ="p "> (</ span > < span class ="sa "> r</ span > < span class ="s2 "> "[ะ-์]"</ span > < span class ="p "> )</ span >
182
+
183
+ < span class ="n "> _TRANS1</ span > < span class ="o "> =</ span > < span class ="nb "> str</ span > < span class ="o "> .</ span > < span class ="n "> maketrans</ span > < span class ="p "> (</ span >
184
+ < span class ="s2 "> "กขฃคฅฆงจฉชฌซศษสฎดฏตฐฑฒถทธณนบปผพภฝฟมญยรลฬฤฦวอหฮ"</ span > < span class ="p "> ,</ span >
185
+ < span class ="s2 "> "กขขขขขงจชชชสสสสดดตตททททททนนบปพพพฟฟมยยรรรรรวอฮฮ"</ span > < span class ="p "> ,</ span >
186
+ < span class ="p "> )</ span >
187
+ < span class ="n "> _TRANS2</ span > < span class ="o "> =</ span > < span class ="nb "> str</ span > < span class ="o "> .</ span > < span class ="n "> maketrans</ span > < span class ="p "> (</ span >
188
+ < span class ="s2 "> "มวำกขฃคฅฆงยญณนฎฏดตศษสบปพภผฝฟหอฮจฉชซฌฐฑฒถทธรฤลฦ"</ span > < span class ="p "> ,</ span >
189
+ < span class ="s2 "> "0001111112233344444445555666666777778888889999"</ span > < span class ="p "> ,</ span >
190
+ < span class ="p "> )</ span >
191
+
192
+
193
+ < div class ="viewcode-block " id ="udom83 "> < a class ="viewcode-back " href ="../../../api/soundex.html#pythainlp.soundex.udom83 "> [docs]</ a > < span class ="k "> def</ span > < span class ="nf "> udom83</ span > < span class ="p "> (</ span > < span class ="n "> text</ span > < span class ="p "> ):</ span >
194
+ < span class ="sd "> """</ span >
195
+ < span class ="sd "> Udom83 - It's a Thai soundex rule.</ span >
196
+
197
+ < span class ="sd "> :param str text: Thai word</ span >
198
+ < span class ="sd "> :return: Udom83 soundex</ span >
199
+ < span class ="sd "> """</ span >
200
+
201
+ < span class ="k "> if</ span > < span class ="ow "> not</ span > < span class ="n "> text</ span > < span class ="p "> :</ span >
202
+ < span class ="k "> return</ span > < span class ="s2 "> ""</ span >
203
+
204
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_1</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ัน</ span > < span class ="se "> \\</ span > < span class ="s2 "> 1"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
205
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_2</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ั</ span > < span class ="se "> \\</ span > < span class ="s2 "> 1"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
206
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_3</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ัน</ span > < span class ="se "> \\</ span > < span class ="s2 "> 1"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
207
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_4</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ัน"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
208
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_5</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "</ span > < span class ="se "> \\</ span > < span class ="s2 "> 1"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
209
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_6</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "</ span > < span class ="se "> \\</ span > < span class ="s2 "> 1ย"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
210
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_7</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ม</ span > < span class ="se "> \\</ span > < span class ="s2 "> 1"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
211
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_8</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ม"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
212
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_9</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> "ม"</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
213
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_10</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> ""</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
214
+ < span class ="n "> text</ span > < span class ="o "> =</ span > < span class ="n "> _RE_11</ span > < span class ="o "> .</ span > < span class ="n "> sub</ span > < span class ="p "> (</ span > < span class ="s2 "> ""</ span > < span class ="p "> ,</ span > < span class ="n "> text</ span > < span class ="p "> )</ span >
215
+
216
+ < span class ="k "> if</ span > < span class ="ow "> not</ span > < span class ="n "> text</ span > < span class ="p "> :</ span >
217
+ < span class ="k "> return</ span > < span class ="s2 "> ""</ span >
218
+
219
+ < span class ="n "> sd</ span > < span class ="o "> =</ span > < span class ="n "> text</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span > < span class ="o "> .</ span > < span class ="n "> translate</ span > < span class ="p "> (</ span > < span class ="n "> _TRANS1</ span > < span class ="p "> )</ span >
220
+ < span class ="n "> sd</ span > < span class ="o "> +=</ span > < span class ="n "> text</ span > < span class ="p "> [</ span > < span class ="mi "> 1</ span > < span class ="p "> :]</ span > < span class ="o "> .</ span > < span class ="n "> translate</ span > < span class ="p "> (</ span > < span class ="n "> _TRANS2</ span > < span class ="p "> )</ span >
221
+
222
+ < span class ="k "> return</ span > < span class ="p "> (</ span > < span class ="n "> sd</ span > < span class ="o "> +</ span > < span class ="s2 "> "000000"</ span > < span class ="p "> )[:</ span > < span class ="mi "> 7</ span > < span class ="p "> ]</ span > </ div >
223
+ </ pre > </ div >
224
+
225
+ </ div >
226
+
227
+ </ div >
228
+ < footer >
229
+
230
+
231
+ < hr />
232
+
233
+ < div role ="contentinfo ">
234
+ < p >
235
+ © Copyright 2017-2018, PyThaiNLP (Apache Software License 2.0)
236
+
237
+ </ p >
238
+ </ div >
239
+ Built with < a href ="http://sphinx-doc.org/ "> Sphinx</ a > using a < a href ="https://github.com/rtfd/sphinx_rtd_theme "> theme</ a > provided by < a href ="https://readthedocs.org "> Read the Docs</ a > .
240
+
241
+ </ footer >
242
+
243
+ </ div >
244
+ </ div >
245
+
246
+ </ section >
247
+
248
+ </ div >
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+ < script type ="text/javascript " id ="documentation_options " data-url_root ="../../../ " src ="../../../_static/documentation_options.js "> </ script >
257
+ < script type ="text/javascript " src ="../../../_static/jquery.js "> </ script >
258
+ < script type ="text/javascript " src ="../../../_static/underscore.js "> </ script >
259
+ < script type ="text/javascript " src ="../../../_static/doctools.js "> </ script >
260
+ < script async ="async " type ="text/javascript " src ="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML "> </ script >
261
+
262
+
263
+
264
+
265
+ < script type ="text/javascript " src ="../../../_static/js/theme.js "> </ script >
266
+
267
+ < script type ="text/javascript ">
268
+ jQuery ( function ( ) {
269
+ SphinxRtdTheme . Navigation . enable ( true ) ;
270
+ } ) ;
271
+ </ script >
272
+
273
+ </ body >
274
+ </ html >
0 commit comments