Skip to content

Commit f213f22

Browse files
committed
feat(python): Add initial model generator support
1 parent 7015a0a commit f213f22

File tree

3 files changed

+219
-0
lines changed

3 files changed

+219
-0
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ newtype TParameterPosition =
8585

8686
/** A parameter position. */
8787
class ParameterPosition extends TParameterPosition {
88+
/** Gets the underlying integer position, if any. */
89+
int getPosition() {
90+
this = TPositionalParameterPosition(result)
91+
}
92+
8893
/** Holds if this position represents a `self`/`cls` parameter. */
8994
predicate isSelf() { this = TSelfParameterPosition() }
9095

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/**
2+
* @name Capture summary models.
3+
* @description Finds applicable summary models to be used by other queries.
4+
* @kind diagnostic
5+
* @id python/utils/modelgenerator/summary-models
6+
* @tags modelgenerator
7+
*/
8+
9+
import internal.CaptureModels
10+
11+
from DataFlowSummaryTargetApi api, string flow
12+
where flow = captureFlow(api)
13+
select flow order by flow
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/**
2+
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
3+
*/
4+
5+
private import codeql.util.Unit
6+
private import python as P
7+
// DataFlow
8+
private import semmle.python.dataflow.new.DataFlow
9+
private import semmle.python.dataflow.new.internal.DataFlowImpl
10+
private import semmle.python.dataflow.new.internal.DataFlowDispatch
11+
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
12+
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
13+
// ApiGraph
14+
private import semmle.python.frameworks.data.internal.ApiGraphModels as ExternalFlow
15+
private import semmle.python.dataflow.new.internal.DataFlowImplCommon as DataFlowImplCommon
16+
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
17+
private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl
18+
private import modeling.ModelEditor
19+
20+
module ModelGeneratorInput implements ModelGeneratorInputSig<P::Location, PythonDataFlow> {
21+
class Type = Unit; // P::Type ?
22+
23+
class Parameter = DataFlow::ParameterNode;
24+
25+
// class Callable = Callable;
26+
class Callable instanceof DataFlowCallable {
27+
string toString() { result = super.toString() }
28+
}
29+
30+
class NodeExtended extends DataFlow::Node {
31+
Callable getAsExprEnclosingCallable() { result = this.getEnclosingCallable() }
32+
33+
Type getType() { any() }
34+
35+
override Callable getEnclosingCallable() { result = super.getEnclosingCallable() }
36+
37+
// override Callable getEnclosingCallable() {
38+
// result = this.(DataFlow::Node).getEnclosingCallable().(DataFlowFunction).getScope()
39+
// // result = this.(DataFlow::Node).getEnclosingCallable().(DataFlowFunction).getScope()
40+
// // exists(P::Function func |
41+
// // func.getScope() = this.(DataFlow::Node).getEnclosingCallable().getScope()
42+
// // |
43+
// // result = func
44+
// // )
45+
// }
46+
47+
Parameter asParameter() { result = this }
48+
}
49+
50+
private predicate relevant(Callable api) { any() }
51+
52+
predicate isUninterestingForDataFlowModels(Callable api) { none() }
53+
54+
predicate isUninterestingForHeuristicDataFlowModels(Callable api) { none() }
55+
56+
class SourceOrSinkTargetApi extends Callable {
57+
SourceOrSinkTargetApi() { relevant(this) }
58+
}
59+
60+
class SinkTargetApi extends SourceOrSinkTargetApi { }
61+
62+
class SourceTargetApi extends SourceOrSinkTargetApi { }
63+
64+
class SummaryTargetApi extends Callable {
65+
private Callable lift;
66+
67+
SummaryTargetApi() {
68+
lift = this and
69+
relevant(this)
70+
}
71+
72+
Callable lift() { result = lift }
73+
74+
predicate isRelevant() { relevant(this) }
75+
}
76+
77+
// /**
78+
// * `
79+
// */
80+
// private predicate qualifiedName(Callable c, string package, string type) {
81+
// result = c.
82+
// }
83+
84+
predicate isRelevantType(Type t) { any() }
85+
86+
Type getUnderlyingContentType(DataFlow::ContentSet c) { result = any(Type t) and exists(c) }
87+
88+
string qualifierString() { result = "Argument[this]" }
89+
90+
string parameterAccess(Parameter p) {
91+
// TODO: Implement this to support named parameters
92+
result = "Argument[" + p.getPosition().toString() + "]"
93+
// result = "param[]"
94+
}
95+
96+
string parameterContentAccess(Parameter p) { result = "Argument[]" }
97+
98+
class InstanceParameterNode extends DataFlow::ParameterNode {
99+
InstanceParameterNode() { this.getParameter().isSelf() }
100+
}
101+
102+
bindingset[c]
103+
string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
104+
result = parameterAccess(c.(DataFlowCallable).getParameter(pos))
105+
}
106+
107+
bindingset[c]
108+
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
109+
result = parameterContentAccess(c.(DataFlowCallable).getParameter(pos))
110+
or
111+
pos.isSelf() and result = qualifierString()
112+
}
113+
114+
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
115+
// TODO
116+
result = DataFlowImplCommon::getNodeEnclosingCallable(ret)
117+
}
118+
119+
predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) { none() }
120+
121+
predicate sinkModelSanitizer(DataFlow::Node node) { none() }
122+
123+
predicate apiSource(DataFlow::Node source) { none() }
124+
125+
predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() }
126+
127+
string getInputArgument(DataFlow::Node source) { result = "getInputArgument(" + source + ")" }
128+
129+
bindingset[kind]
130+
predicate isRelevantSinkKind(string kind) {
131+
not kind = "log-injection" and
132+
not kind.matches("regex-use%") and
133+
not kind = "file-content-store"
134+
}
135+
136+
bindingset[kind]
137+
predicate isRelevantSourceKind(string kind) { any() }
138+
139+
predicate containerContent(DataFlow::ContentSet c) {
140+
// TODO
141+
any()
142+
}
143+
144+
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
145+
146+
predicate isField(DataFlow::ContentSet c) { any() }
147+
148+
predicate isCallback(DataFlow::ContentSet c) { none() }
149+
150+
string getSyntheticName(DataFlow::ContentSet c) { none() }
151+
152+
string printContent(DataFlow::ContentSet c) {
153+
// TODO
154+
result = "Memeber[]"
155+
// exists(Parameter param |
156+
// param = c.(Public::ParameterNode).getParameter()
157+
// |
158+
// result = "Member[" + param.getName() + "]"
159+
// )
160+
// exists(string name, string arg |
161+
// name = "Member" and
162+
// if arg = "" then result = name else result = "Memeber[" + arg + "]"
163+
// )
164+
}
165+
166+
/**
167+
* - ["argparse.ArgumentParser", "Member[_parse_known_args,_read_args_from_files]", "Argument[0,arg_strings:]", "ReturnValue", "taint"]
168+
*/
169+
string partialModelRow(Callable api, int i) {
170+
exists(Endpoint e | e = api.(DataFlowFunction).getScope() |
171+
i = 0 and result = e.getNamespace()
172+
or
173+
i = 1 and result = e.getClass()
174+
or
175+
i = 2 and result = e.getFunctionName()
176+
or
177+
i = 3 and result = e.getParameters()
178+
179+
)
180+
// and
181+
// // i = 0 and qualifiedName(api, result, _) // package[.Class]
182+
// i = 0 and result = api.(DataFlowCallable)
183+
// or
184+
// i = 1 and result = "1" // name
185+
// or
186+
// i = 2 and
187+
// result = "2"
188+
// TODO
189+
// exists(Parameter p | p = api.getArg(_) | result = "Member[" + p.getName() + "]") // parameters
190+
}
191+
192+
string partialNeutralModelRow(Callable api, int i) { result = partialModelRow(api, i) }
193+
194+
// TODO: Implement this when we want to generate sources.
195+
predicate sourceNode(DataFlow::Node node, string kind) { none() }
196+
197+
// TODO: Implement this when we want to generate sinks.
198+
predicate sinkNode(DataFlow::Node node, string kind) { none() }
199+
}
200+
201+
import MakeModelGenerator<P::Location, PythonDataFlow, PythonTaintTracking, ModelGeneratorInput>

0 commit comments

Comments
 (0)