-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathtest_string.py
More file actions
106 lines (91 loc) · 3.38 KB
/
test_string.py
File metadata and controls
106 lines (91 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# coding=utf8
# !/usr/bin/env python
# __author_='crisschan'
# __data__='20160908'
# __from__='EmmaTools https://github.com/crisschan/EMMATools'
# __instruction__= 测试需要处理字符串的类:
# 修改了方法添加了@classmethod装饰器
import random
import re
class TestString(object):
def __GetMiddleStr(self,content, startPos, endPos):
'''
:根据开头和结尾字符串获取中间字符串
:param content:原始string
:param startPos: 开始位置
:param endPos: 结束位置
:return: 一个string
'''
# startIndex = content.index(startStr)
# if startIndex >= 0:
# startIndex += len(startStr)
# endIndex = content.index(endStr)
return content[startPos:endPos]
def __Getsubindex(self,content, subStr):
'''
:param content: 原始string
:param subStr: 字符边界
:return: 字符边界出现的第一个字符的在原始string中的位置 []
'''
alist = []
asublen = len(subStr)
sRep = ''
istep = 0
while istep < asublen:
if random.uniform(1, 2) == 1:
sRep = sRep + '~'
else:
sRep = sRep + '^'
istep = istep + 1
apos = content.find(subStr)
while apos >= 0:
alist.append(apos)
content = content.replace(subStr, sRep, 1)
apos = content.find(subStr)
return alist
@classmethod
def GetTestString(cls_obj,content, startStr, endStr):
'''
:param content: 原始string
:param startStr: 开始字符边界
:param endStr: 结束字符边界
:return: 前后边界一致的中间部分字符串 []
'''
reStrList = []
if content is None or content=='':
return reStrList
if startStr!='' and content.find(startStr)<0:
startStr=''
if endStr!='' and content.find(endStr)<0:
endStr=''
if startStr=='':
reStrList.append(content[:content.find(endStr)])
return reStrList
elif endStr=='':
reStrList.append(content[content.find(startStr)+len(startStr):])
return reStrList
elif startStr=='' and endStr=='':
reStrList.append(content)
return reStrList
else:
starttemplist = cls_obj().__Getsubindex(content, startStr)
nStartlen = len(startStr)
startIndexlist = []
for ntemp in starttemplist:
startIndexlist.append(ntemp + nStartlen)
endIndexlist = cls_obj().__Getsubindex(content, endStr)
astep = 0
bstep = 0
dr = re.compile(r'<[^>]+>', re.S)
while astep < len(startIndexlist) and bstep < len(endIndexlist):
while startIndexlist[astep] >= endIndexlist[bstep]:
bstep = bstep + 1
strTemp = cls_obj().__GetMiddleStr(content, startIndexlist[astep], endIndexlist[bstep])
strTemp = dr.sub('', strTemp)
reStrList.append(strTemp)
astep = astep + 1
bstep = bstep + 1
return reStrList
# if __name__=="__main__":
# strgg = '24214jnjkanrhquihrghjw<>eufhuin/jfghs<>ajfjsanfghjkg/hjkghj<>kghjfasd/sdaf'
# print(TestString.GetTestString(strgg,'<a href="','/'))