Skip to content

Commit 6e16704

Browse files
authored
Merge pull request github#7307 from adityasharad/atm/perf-debugging
JS/ATM: Various compilation fixes and performance improvements
2 parents 657cd89 + 271b23b commit 6e16704

File tree

2 files changed

+103
-41
lines changed

2 files changed

+103
-41
lines changed

javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
2525
result = unique(string x | x = FunctionBodies::getBodyTokenFeatureForEntity(entity))
2626
)
2727
or
28-
exists(getACallBasedTokenFeatureComponent(endpoint, _, featureName)) and
2928
result =
30-
concat(DataFlow::CallNode call, string component |
29+
strictconcat(DataFlow::CallNode call, string component |
3130
component = getACallBasedTokenFeatureComponent(endpoint, call, featureName)
3231
|
3332
component, " "
@@ -110,12 +109,13 @@ private string getACallBasedTokenFeatureComponent(
110109

111110
/** This module provides functionality for getting the function body feature associated with a particular entity. */
112111
module FunctionBodies {
113-
/** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
114-
private predicate bodyTokens(
115-
DatabaseFeatures::Entity entity, DatabaseFeatures::AstNode node, string token
116-
) {
117-
DatabaseFeatures::astNodes(entity, _, _, node, _) and
118-
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t))
112+
/** Holds if `___location` is the ___location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113+
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location ___location, string token) {
114+
exists(DatabaseFeatures::AstNode node |
115+
DatabaseFeatures::astNodes(entity, _, _, node, _) and
116+
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
117+
___location = node.getLocation()
118+
)
119119
}
120120

121121
/**
@@ -127,23 +127,18 @@ module FunctionBodies {
127127
// If a function has more than 256 body subtokens, then featurize it as absent. This
128128
// approximates the behavior of the classifer on non-generic body features where large body
129129
// features are replaced by the absent token.
130-
if count(DatabaseFeatures::AstNode node, string token | bodyTokens(entity, node, token)) > 256
131-
then result = ""
132-
else
133-
result =
134-
concat(int i, string rankedToken |
135-
rankedToken =
136-
rank[i](DatabaseFeatures::AstNode node, string token, Location l |
137-
bodyTokens(entity, node, token) and l = node.getLocation()
138-
|
139-
token
140-
order by
141-
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
142-
l.getEndColumn(), token
143-
)
144-
|
145-
rankedToken, " " order by i
146-
)
130+
//
131+
// We count locations instead of tokens because tokens are often not unique.
132+
strictcount(Location l | bodyTokens(entity, l, _)) <= 256 and
133+
result =
134+
strictconcat(string token, Location l |
135+
bodyTokens(entity, l, token)
136+
|
137+
token, " "
138+
order by
139+
l.getFile().getAbsolutePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
140+
l.getEndColumn(), token
141+
)
147142
}
148143
}
149144

@@ -247,11 +242,12 @@ private module AccessPaths {
247242
else accessPath = previousAccessPath + " " + paramName
248243
)
249244
or
250-
exists(string callbackName, string index |
245+
exists(string callbackName, int index |
251246
node =
252-
getNamedParameter(previousNode.getASuccessor("param " + index).getMember(callbackName),
253-
paramName) and
254-
index != "-1" and // ignore receiver
247+
getNamedParameter(previousNode
248+
.getASuccessor(API::Label::parameter(index))
249+
.getMember(callbackName), paramName) and
250+
index != -1 and // ignore receiver
255251
if includeStructuralInfo = true
256252
then
257253
accessPath =
@@ -280,10 +276,13 @@ private string getASupportedFeatureName() {
280276
* `featureValue` for the endpoint `endpoint`.
281277
*/
282278
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
283-
featureName = getASupportedFeatureName() and
279+
ModelScoring::endpoints(endpoint) and
284280
(
285-
featureValue = unique(string x | x = getTokenFeature(endpoint, featureName))
286-
or
287-
not exists(unique(string x | x = getTokenFeature(endpoint, featureName))) and featureValue = ""
281+
if strictcount(getTokenFeature(endpoint, featureName)) = 1
282+
then featureValue = getTokenFeature(endpoint, featureName)
283+
else (
284+
// Performance note: this is a Cartesian product between all endpoints and feature names.
285+
featureValue = "" and featureName = getASupportedFeatureName()
286+
)
288287
)
289288
}

javascript/ql/lib/semmle/javascript/dependencies/FrameworkLibraries.qll

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,31 @@ abstract class FrameworkLibraryWithMarkerComment extends FrameworkLibrary {
9292

9393
/**
9494
* Gets a regular expression that can be used to identify an instance of
95-
* this framework library.
95+
* this framework library, with `<VERSION>` as a placeholder for version
96+
* numbers.
9697
*
9798
* The first capture group of this regular expression should match
98-
* the version number. Any occurrences of the string `<VERSION>` in
99-
* the regular expression are replaced by `versionRegex()` before
100-
* matching.
99+
* the version number.
100+
*
101+
* Subclasses should implement this predicate.
102+
*
103+
* Callers should avoid using this predicate directly,
104+
* and instead use `getAMarkerCommentRegexWithoutPlaceholders()`,
105+
* which will replace any occurrences of the string `<VERSION>` in
106+
* the regular expression with `versionRegex()`.
101107
*/
102108
abstract string getAMarkerCommentRegex();
109+
110+
/**
111+
* Gets a regular expression that can be used to identify an instance of
112+
* this framework library.
113+
*
114+
* The first capture group of this regular expression is intended to match
115+
* the version number.
116+
*/
117+
final string getAMarkerCommentRegexWithoutPlaceholders() {
118+
result = this.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex())
119+
}
103120
}
104121

105122
/**
@@ -182,18 +199,64 @@ class FrameworkLibraryInstanceWithMarkerComment extends FrameworkLibraryInstance
182199
override predicate info(FrameworkLibrary fl, string v) { matchMarkerComment(_, this, fl, v) }
183200
}
184201

202+
/** A marker comment that indicates a framework library. */
203+
private class MarkerComment extends Comment {
204+
MarkerComment() {
205+
/*
206+
* PERFORMANCE OPTIMISATION:
207+
*
208+
* Each framework library has a regular expression describing its marker comments.
209+
* We want to find the set of marker comments and the framework regexes they match.
210+
* In order to perform such regex matching, CodeQL needs to compute the
211+
* Cartesian product of possible receiver strings and regexes first,
212+
* containing `num_receivers * num_regexes` tuples.
213+
*
214+
* A straightforward attempt to match marker comments with individual
215+
* framework regexes will compute the Cartesian product between
216+
* the set of comments and the set of framework regexes.
217+
* Total: `num_comments * num_frameworks` tuples.
218+
*
219+
* Instead, create a single regex that matches *all* frameworks.
220+
* This is the regex union of the individual framework regexes
221+
* i.e. `(regex_1)|(regex_2)|...|(regex_n)`
222+
* This approach will compute the Cartesian product between
223+
* the set of comments and the singleton set of this union regex.
224+
* Total: `num_comments * 1` tuples.
225+
*
226+
* To identify the individual frameworks and extract the version number from capture groups,
227+
* use the member predicate `matchesFramework` *after* this predicate has been computed.
228+
*/
229+
230+
exists(string unionRegex |
231+
unionRegex =
232+
concat(FrameworkLibraryWithMarkerComment fl |
233+
|
234+
"(" + fl.getAMarkerCommentRegexWithoutPlaceholders() + ")", "|"
235+
)
236+
|
237+
this.getText().regexpMatch(unionRegex)
238+
)
239+
}
240+
241+
/**
242+
* Holds if this marker comment indicates an instance of the framework `fl`
243+
* with version number `version`.
244+
*/
245+
predicate matchesFramework(FrameworkLibraryWithMarkerComment fl, string version) {
246+
this.getText().regexpCapture(fl.getAMarkerCommentRegexWithoutPlaceholders(), 1) = version
247+
}
248+
}
249+
185250
/**
186251
* Holds if comment `c` in toplevel `tl` matches the marker comment of library
187252
* `fl` at `version`.
188253
*/
189254
cached
190255
private predicate matchMarkerComment(
191-
Comment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
256+
MarkerComment c, TopLevel tl, FrameworkLibraryWithMarkerComment fl, string version
192257
) {
193258
c.getTopLevel() = tl and
194-
exists(string r | r = fl.getAMarkerCommentRegex().replaceAll("<VERSION>", versionRegex()) |
195-
version = c.getText().regexpCapture(r, 1)
196-
)
259+
c.matchesFramework(fl, version)
197260
}
198261

199262
/**

0 commit comments

Comments
 (0)