1
1
import datetime
2
2
import logging
3
- from pathlib import Path
4
3
from typing import List , Optional , Any , Tuple , Union
5
4
6
5
from credsweeper .common .constants import RECURSIVE_SCAN_LIMITATION
@@ -77,22 +76,26 @@ def scanner(self) -> Scanner:
77
76
return self .__scanner
78
77
79
78
@staticmethod
80
- def get_deep_scanners (data : bytes , file_type : str ) -> List [Any ]:
79
+ def get_deep_scanners (data : bytes , file_type : str , depth : int ) -> List [Any ]:
81
80
"""Returns possibly scan methods for the data depends on content"""
82
81
deep_scanners : List [Any ] = []
83
82
if Util .is_zip (data ):
84
- deep_scanners .append (ZipScanner )
83
+ if 0 < depth :
84
+ deep_scanners .append (ZipScanner )
85
85
# probably, there might be a docx, xlxs and so on.
86
86
# It might be scanned with text representation in third-party libraries.
87
87
deep_scanners .append (XlsxScanner )
88
88
deep_scanners .append (DocxScanner )
89
89
deep_scanners .append (PptxScanner )
90
90
elif Util .is_bzip2 (data ):
91
- deep_scanners .append (Bzip2Scanner )
91
+ if 0 < depth :
92
+ deep_scanners .append (Bzip2Scanner )
92
93
elif Util .is_tar (data ):
93
- deep_scanners .append (TarScanner )
94
+ if 0 < depth :
95
+ deep_scanners .append (TarScanner )
94
96
elif Util .is_gzip (data ):
95
- deep_scanners .append (GzipScanner )
97
+ if 0 < depth :
98
+ deep_scanners .append (GzipScanner )
96
99
elif Util .is_pdf (data ):
97
100
deep_scanners .append (PdfScanner )
98
101
elif Util .is_jks (data ):
@@ -113,7 +116,10 @@ def get_deep_scanners(data: bytes, file_type: str) -> List[Any]:
113
116
deep_scanners .append (MxfileScanner )
114
117
deep_scanners .append (XmlScanner )
115
118
else :
116
- deep_scanners = [EncoderScanner , LangScanner , ByteScanner ]
119
+ if 0 < depth :
120
+ deep_scanners .append (EncoderScanner )
121
+ deep_scanners .append (LangScanner )
122
+ deep_scanners .append (ByteScanner )
117
123
return deep_scanners
118
124
119
125
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@@ -136,27 +142,29 @@ def scan(self,
136
142
if isinstance (content_provider , TextContentProvider ) or isinstance (content_provider , ByteContentProvider ):
137
143
# Feature to scan files which might be containers
138
144
data = content_provider .data
145
+ info = "FILE"
139
146
elif isinstance (content_provider , DiffContentProvider ) and content_provider .diff :
140
147
candidates = self .scanner .scan (content_provider )
141
148
# Feature to scan binary diffs
142
149
diff = content_provider .diff [0 ].get ("line" )
143
150
# the check for legal fix mypy issue
144
151
if isinstance (diff , bytes ):
145
152
data = diff
153
+ info = "DIFF"
146
154
else :
147
155
logger .warning (f"Content provider { type (content_provider )} does not support deep scan" )
156
+ info = "NA"
148
157
149
158
if data :
150
159
data_provider = DataContentProvider (data = data ,
151
160
file_path = content_provider .file_path ,
152
161
file_type = content_provider .file_type ,
153
- info = Path ( content_provider .file_path ). as_posix () )
162
+ info = content_provider .info or info )
154
163
# iterate for all possibly scanner methods WITHOUT ByteContentProvider for TextContentProvider
155
- scanner_classes = self .get_deep_scanners (data , content_provider .file_type )
164
+ scanner_classes = self .get_deep_scanners (data , content_provider .file_type , depth )
156
165
fallback = True
157
166
for scan_class in scanner_classes :
158
- if new_candidates := scan_class .data_scan (self , data_provider , depth - 1 ,
159
- recursive_limit_size - len (data )):
167
+ if new_candidates := scan_class .data_scan (self , data_provider , depth , recursive_limit_size - len (data )):
160
168
augment_candidates (candidates , new_candidates )
161
169
fallback = False
162
170
if fallback and ByteScanner not in scanner_classes and not Util .is_binary (data ):
@@ -196,7 +204,7 @@ def recursive_scan(
196
204
else :
197
205
fallback = True
198
206
# iterate for all possibly scanner methods
199
- scanner_classes = self .get_deep_scanners (data_provider .data , data_provider .file_type )
207
+ scanner_classes = self .get_deep_scanners (data_provider .data , data_provider .file_type , depth )
200
208
for scanner_class in scanner_classes :
201
209
if new_candidates := scanner_class .data_scan (self , data_provider , depth , recursive_limit_size ):
202
210
augment_candidates (candidates , new_candidates )
0 commit comments