@@ -25,9 +25,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
25
25
result = unique( string x | x = FunctionBodies:: getBodyTokenFeatureForEntity ( entity ) )
26
26
)
27
27
or
28
- exists ( getACallBasedTokenFeatureComponent ( endpoint , _, featureName ) ) and
29
28
result =
30
- concat ( DataFlow:: CallNode call , string component |
29
+ strictconcat ( DataFlow:: CallNode call , string component |
31
30
component = getACallBasedTokenFeatureComponent ( endpoint , call , featureName )
32
31
|
33
32
component , " "
@@ -110,12 +109,13 @@ private string getACallBasedTokenFeatureComponent(
110
109
111
110
/** This module provides functionality for getting the function body feature associated with a particular entity. */
112
111
module FunctionBodies {
113
- /** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
114
- private predicate bodyTokens (
115
- DatabaseFeatures:: Entity entity , DatabaseFeatures:: AstNode node , string token
116
- ) {
117
- DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
118
- token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) )
112
+ /** Holds if `___location` is the ___location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113
+ private predicate bodyTokens ( DatabaseFeatures:: Entity entity , Location ___location , string token ) {
114
+ exists ( DatabaseFeatures:: AstNode node |
115
+ DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
116
+ token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) ) and
117
+ ___location = node .getLocation ( )
118
+ )
119
119
}
120
120
121
121
/**
@@ -127,23 +127,18 @@ module FunctionBodies {
127
127
// If a function has more than 256 body subtokens, then featurize it as absent. This
128
128
// approximates the behavior of the classifer on non-generic body features where large body
129
129
// features are replaced by the absent token.
130
- if count ( DatabaseFeatures:: AstNode node , string token | bodyTokens ( entity , node , token ) ) > 256
131
- then result = ""
132
- else
133
- result =
134
- concat ( int i , string rankedToken |
135
- rankedToken =
136
- rank [ i ] ( DatabaseFeatures:: AstNode node , string token , Location l |
137
- bodyTokens ( entity , node , token ) and l = node .getLocation ( )
138
- |
139
- token
140
- order by
141
- l .getFile ( ) .getAbsolutePath ( ) , l .getStartLine ( ) , l .getStartColumn ( ) , l .getEndLine ( ) ,
142
- l .getEndColumn ( ) , token
143
- )
144
- |
145
- rankedToken , " " order by i
146
- )
130
+ //
131
+ // We count locations instead of tokens because tokens are often not unique.
132
+ strictcount ( Location l | bodyTokens ( entity , l , _) ) <= 256 and
133
+ result =
134
+ strictconcat ( string token , Location l |
135
+ bodyTokens ( entity , l , token )
136
+ |
137
+ token , " "
138
+ order by
139
+ l .getFile ( ) .getAbsolutePath ( ) , l .getStartLine ( ) , l .getStartColumn ( ) , l .getEndLine ( ) ,
140
+ l .getEndColumn ( ) , token
141
+ )
147
142
}
148
143
}
149
144
@@ -247,11 +242,12 @@ private module AccessPaths {
247
242
else accessPath = previousAccessPath + " " + paramName
248
243
)
249
244
or
250
- exists ( string callbackName , string index |
245
+ exists ( string callbackName , int index |
251
246
node =
252
- getNamedParameter ( previousNode .getASuccessor ( "param " + index ) .getMember ( callbackName ) ,
253
- paramName ) and
254
- index != "-1" and // ignore receiver
247
+ getNamedParameter ( previousNode
248
+ .getASuccessor ( API:: Label:: parameter ( index ) )
249
+ .getMember ( callbackName ) , paramName ) and
250
+ index != - 1 and // ignore receiver
255
251
if includeStructuralInfo = true
256
252
then
257
253
accessPath =
@@ -280,10 +276,13 @@ private string getASupportedFeatureName() {
280
276
* `featureValue` for the endpoint `endpoint`.
281
277
*/
282
278
predicate tokenFeatures ( DataFlow:: Node endpoint , string featureName , string featureValue ) {
283
- featureName = getASupportedFeatureName ( ) and
279
+ ModelScoring :: endpoints ( endpoint ) and
284
280
(
285
- featureValue = unique( string x | x = getTokenFeature ( endpoint , featureName ) )
286
- or
287
- not exists ( unique( string x | x = getTokenFeature ( endpoint , featureName ) ) ) and featureValue = ""
281
+ if strictcount ( getTokenFeature ( endpoint , featureName ) ) = 1
282
+ then featureValue = getTokenFeature ( endpoint , featureName )
283
+ else (
284
+ // Performance note: this is a Cartesian product between all endpoints and feature names.
285
+ featureValue = "" and featureName = getASupportedFeatureName ( )
286
+ )
288
287
)
289
288
}
0 commit comments