Skip to content

Commit e0437e5

Browse files
committed
feat(python): Update source and sink summary queries
1 parent 7414f9b commit e0437e5

File tree

3 files changed

+124
-22
lines changed

3 files changed

+124
-22
lines changed

python/ql/lib/modeling/Util.qll

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ private import semmle.python.filters.Tests
1111
*/
1212
class TestFile extends File {
1313
TestFile() {
14-
this.getRelativePath().regexpMatch(".*(test|spec|examples).+") and
14+
this.getRelativePath().regexpMatch(".*(test|spec|examples|__main__).+") and
1515
not this.getAbsolutePath().matches("%/ql/test/%") // allows our test cases to work
1616
}
1717
}

python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll

+1-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ predicate sourceModel(string type, string path, string kind, string model) {
332332
}
333333

334334
/** Holds if a sink model exists for the given parameters. */
335-
private predicate sinkModel(string type, string path, string kind, string model) {
335+
predicate sinkModel(string type, string path, string kind, string model) {
336336
any(DeprecationAdapter a).sinkModel(type, path, kind) and
337337
model = "SinkModelCsv"
338338
or

python/ql/src/utils/modelgenerator/internal/CaptureModels.qll

+122-20
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,15 @@ private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
1414
private import semmle.python.frameworks.data.internal.ApiGraphModels as ExternalFlow
1515
private import semmle.python.dataflow.new.internal.DataFlowImplCommon as DataFlowImplCommon
1616
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
17+
private import semmle.python.dataflow.new.TaintTracking
1718
private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl
1819
private import modeling.ModelEditor
20+
private import modeling.Util as ModelEditorUtil
21+
// Concepts
22+
private import semmle.python.Concepts
23+
private import semmle.python.security.dataflow.CodeInjectionCustomizations
24+
private import semmle.python.security.dataflow.ServerSideRequestForgeryCustomizations
25+
private import semmle.python.security.dataflow.UnsafeDeserializationCustomizations
1926

2027
module ModelGeneratorInput implements ModelGeneratorInputSig<P::Location, PythonDataFlow> {
2128
class Type = Unit;
@@ -36,19 +43,41 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<P::Location, Python
3643
Parameter asParameter() { result = this }
3744
}
3845

39-
private predicate relevant(Callable api) { any() }
46+
private predicate relevant(Callable api) {
47+
api.(DataFlowCallable).getScope() instanceof ModelEditorUtil::RelevantScope
48+
}
4049

4150
predicate isUninterestingForDataFlowModels(Callable api) { none() }
4251

4352
predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() }
4453

54+
private predicate hasManualSourceModel(Callable api) {
55+
exists(Endpoint endpoint |
56+
endpoint = api.(DataFlowCallable).getScope() and
57+
ExternalFlow::sourceModel(endpoint.getNamespace(), _, endpoint.getKind(), _)
58+
)
59+
or
60+
api.(DataFlowCallable).getScope() = any(ActiveThreatModelSource ats).getScope()
61+
}
62+
63+
private predicate hasManualSinkModel(Callable api) {
64+
exists(Endpoint endpoint |
65+
endpoint = api.(DataFlowCallable).getScope() and
66+
ExternalFlow::sinkModel(endpoint.getNamespace(), _, endpoint.getKind(), _)
67+
)
68+
}
69+
4570
class SourceOrSinkTargetApi extends Callable {
4671
SourceOrSinkTargetApi() { relevant(this) }
4772
}
4873

49-
class SinkTargetApi extends SourceOrSinkTargetApi { }
74+
class SinkTargetApi extends SourceOrSinkTargetApi {
75+
SinkTargetApi() { not hasManualSinkModel(this) }
76+
}
5077

51-
class SourceTargetApi extends SourceOrSinkTargetApi { }
78+
class SourceTargetApi extends SourceOrSinkTargetApi {
79+
SourceTargetApi() { not hasManualSourceModel(this) }
80+
}
5281

5382
class SummaryTargetApi extends Callable {
5483
private Callable lift;
@@ -67,15 +96,20 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<P::Location, Python
6796

6897
Type getUnderlyingContentType(DataFlow::ContentSet c) { result = any(Type t) and exists(c) }
6998

70-
string qualifierString() { result = "Argument[this]" }
99+
string qualifierString() { result = "Argument[self]" }
71100

72-
string parameterAccess(Parameter p) {
73-
result = "Argument[" + p.getParameter().getName() + "]"
74-
or
75-
not exists(p.getParameter().getName()) and
76-
result = "Argument[" + p.getParameter().getPosition().toString() + "]"
101+
private string parameterMad(Parameter p) {
102+
exists(P::Parameter param |
103+
param = p.getParameter() and
104+
(
105+
not param.isSelf() and
106+
result = "Argument[" + param.getPosition().toString() + "," + param.getName() + ":]"
107+
)
108+
)
77109
}
78110

111+
string parameterAccess(Parameter p) { result = parameterMad(p) }
112+
79113
string parameterContentAccess(Parameter p) { result = "Argument[]" }
80114

81115
class InstanceParameterNode extends DataFlow::ParameterNode {
@@ -101,13 +135,22 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<P::Location, Python
101135

102136
predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) { none() }
103137

104-
predicate sinkModelSanitizer(DataFlow::Node node) { none() }
138+
predicate sinkModelSanitizer(DataFlow::Node node) {
139+
// Any Sanitizer
140+
node instanceof Escaping
141+
}
105142

106-
predicate apiSource(DataFlow::Node source) { none() }
143+
predicate apiSource(DataFlow::Node source) {
144+
// TODO: Non-Function Parameter support
145+
source instanceof DataFlow::ParameterNode
146+
}
107147

108148
predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() }
109149

110-
string getInputArgument(DataFlow::Node source) { result = "getInputArgument(" + source + ")" }
150+
string getInputArgument(DataFlow::Node source) {
151+
source instanceof DataFlow::ParameterNode and
152+
result = parameterMad(source)
153+
}
111154

112155
bindingset[kind]
113156
predicate isRelevantSinkKind(string kind) {
@@ -134,21 +177,80 @@ module ModelGeneratorInput implements ModelGeneratorInputSig<P::Location, Python
134177

135178
string printContent(DataFlow::ContentSet c) { result = c.toString() }
136179

137-
string partialModelRow(Callable api, int i) {
138-
exists(Endpoint e | e = api.(DataFlowFunction).getScope() |
139-
i = 0 and result = e.getNamespace() + "." + e.getClass()
180+
private string modelEndpoint(Endpoint endpoint) {
181+
endpoint.getKind() = ["Function", "StaticMethod"] and
182+
(
183+
endpoint.getClass() != "" and
184+
result =
185+
"Member[" + endpoint.getClass().replaceAll(".", "].Member[") + "].Member[" +
186+
endpoint.getFunctionName() + "]"
140187
or
141-
i = 1 and result = "Member[" + e.getFunctionName() + "]"
188+
endpoint.getClass() = "" and
189+
result = "Member[" + endpoint.getFunctionName() + "]"
190+
)
191+
or
192+
endpoint.getKind() = ["InstanceMethod", "ClassMethod", "InitMethod"] and
193+
result =
194+
"Member[" + endpoint.getClass().replaceAll(".", "].Member[") + "].Instance.Member[" +
195+
endpoint.getFunctionName() + "]"
196+
}
197+
198+
string partialModelRow(Callable api, int i) {
199+
exists(Endpoint e |
200+
e = api.(DataFlowFunction).getScope() and
201+
(
202+
i = 0 and result = e.getNamespace()
203+
or
204+
i = 1 and
205+
result = modelEndpoint(e)
206+
)
142207
)
143208
}
144209

145210
string partialNeutralModelRow(Callable api, int i) { result = partialModelRow(api, i) }
146211

147-
// TODO: Implement this when we want to generate sources.
148-
predicate sourceNode(DataFlow::Node node, string kind) { none() }
212+
/**
213+
* Holds if the given node is a source node of the given kind.
214+
*/
215+
predicate sourceNode(DataFlow::Node node, string kind) {
216+
exists(ThreatModelSource tms |
217+
node.getScope() = tms.getScope() and
218+
kind = tms.getThreatModel()
219+
)
220+
}
149221

150-
// TODO: Implement this when we want to generate sinks.
151-
predicate sinkNode(DataFlow::Node node, string kind) { none() }
222+
/**
223+
* Holds if the given node is a sink node of the given kind.
224+
*/
225+
predicate sinkNode(DataFlow::Node node, string kind) {
226+
// Command Injection
227+
node = any(SystemCommandExecution sce).getCommand() and
228+
kind = "command-injection"
229+
or
230+
// Code Injection
231+
node = any(CodeInjection::Sink ci) and
232+
kind = "code-injection"
233+
or
234+
// Unsafe Deserialization
235+
node = any(UnsafeDeserialization::Sink ud) and
236+
kind = "unsafe-deserialization"
237+
or
238+
// SQL Injection
239+
node = any(SqlExecution sql).getSql() and
240+
kind = "sql-injection"
241+
or
242+
// File
243+
node = any(FileSystemAccess fcs).getAPathArgument() and
244+
kind = "path-injection"
245+
or
246+
// Template Injection
247+
node = any(TemplateConstruction tc).getSourceArg() and
248+
kind = "template-injection"
249+
or
250+
// Server Side Request Forgery
251+
node = any(ServerSideRequestForgery::Sink ssrf).getRequest() and
252+
kind = "request-forgery"
253+
}
152254
}
153255

154256
import MakeModelGenerator<P::Location, PythonDataFlow, PythonTaintTracking, ModelGeneratorInput>

0 commit comments

Comments
 (0)