-
Notifications
You must be signed in to change notification settings - Fork 55
/
Copy pathSensitiveFileParser.py
48 lines (41 loc) · 1.59 KB
/
SensitiveFileParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/local/bin/python
# -*- coding:utf-8 -*-
# @Time : 2019/4/20 4:52 PM
# @Author : Jerry
# @Desc : 根据文件类型进行相应的敏感信息解析
# @File : SensitiveFileParser.py
from lib.config import log
from lib.common.basic import deleteFile
from lib.parser.ExcelParser import ExcelParser
from lib.parser.WordParser import WordParser
from lib.parser.PdfParser import PdfParser
parser_dict = {'excel': ExcelParser, 'word': WordParser, 'pdf': PdfParser}
class SensitiveFileParser():
def __init__(self, downloaded_file_path_dict, file_type):
self.downloaded_file_path_dict = downloaded_file_path_dict
self.file_type = file_type
self.sensitive_result_dict = {}
def startParse(self):
'''
开始解析
'''
for file_url, file in self.downloaded_file_path_dict.items():
parser = parser_dict[self.file_type](file)
sensitive_dict = {}
delete_flag = 1
try:
sensitive_dict = parser.read()
# print(sensitive_dict)
except Exception as e:
# 无法解析的情况
pass # 报错日志过多
# log.logger.debug(e)
# log.logger.debug(file)
else:
if sensitive_dict['phone'] or sensitive_dict['idcard'] or sensitive_dict['email']:
self.sensitive_result_dict[file_url] = sensitive_dict
delete_flag = 0
finally:
if delete_flag:
deleteFile(file)
return self.sensitive_result_dict