-
Notifications
You must be signed in to change notification settings - Fork 1
/
mc_fx.py
66 lines (63 loc) · 1.9 KB
/
mc_fx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def split_english(text):
# 初始化结果列表
result = [text[0]]
bj = False
# 遍历字符串,将大写字母作为分隔符进行分词
for i in range(1,len(text)):
if bj:
result.append("_")
bj=False
elif text[i].isupper():
if i != len(text)-1:
if not(text[i-1].isupper()) or text[i+1].islower():
result.append("_")
elif not(text[i-1].isupper()) :
result.append("_")
elif text[i].isdigit():
if not text[i-1].isdigit():
result.append("_")
if i != len(text)-1 and not text[i+1].isdigit():
bj = True
result.append(text[i])
# 将结果列表转换为字符串并返回
return "".join(result).split('_')
#获取特定schema的表名
def get_schema(js, schema):
#schema=7(emr),8(ip),9(op) str
db = js['dt'].get('db_0', {})
iplb = []
ind = 0 # 设置初始值为0
while True:
tb = f"table_{ind}"
if tb in db:
if schema in str(db[tb].get("TABLE_SCHEMA", "")):
iplb.append(db[tb]["name"])
ind += 1
else:
break
return iplb
def is_any_chinese(strs):
for _char in strs:
if '\u4e00' <= _char <= '\u9fa5':
return True
return False
#将词汇释义拼接起来组成完整的名称解释
def mcfy(text, js):
'''
lb:list 该名称分词后所有的词汇
js:json 存储词汇解释的字典
'''
lb = split_english(text)
res = ''
for i in lb:
if i and i.lower() in js:
res += js[i.lower()]+'__'
elif i.isdigit():
res += i+'__'
elif len(i) < 3:
res += i+'__'
elif is_any_chinese(i):
res += i+'__'
else:
print('目前该词汇字典没有收纳:'+i)
return res