Skip to content

Commit b39a89b

Browse files
arthaudmeta-codesync[bot]
authored andcommitted
Track implicit receiver on parameterized-target bindings
Summary: # Problem Pysa does not correctly handle bound methods that are stored in a variable (or otherwise passed around) and called later. For example: ``` class Connection: def execute(self, sql, parameters): # sink is on `sql`, like sqlite3.Connection.execute _test_sink(sql) class Cache: def row_update(self, rowid) -> None: sql = return_callable_as_any(self._con.execute) sql("UPDATE Cache SET value = ? WHERE rowid = ?", (rowid,)) ``` `self._con.execute` is a bound method, so `self` is already bound to `con` and the two call arguments map to `sql` and `parameters` — the sink only ever sees the constant query string. But when Pysa inlines the captured callable (creating `return_callable_as_any[f=Connection.execute]`), it does not retain whether the method was bound or unbound. Treating it as unbound, it maps the first argument (the query string) onto `self` and the second argument (the parameter tuple holding `rowid`) onto `sql`, the sink. The result is a false positive: a safe query parameter is reported as reaching the SQL sink. This is a pattern we have seen produce false positives in real code. # Solution Store the callee's `implicit_receiver` on each parameterized-target binding. `Target.Parameterized`'s parameter-map value changes from a bare `Target.t` to a record `parameter_value = { target; implicit_receiver }`. A bound binding is rendered with a trailing ` (bound)` suffix, for example `return_callable_as_any[formal(f, position=0)=Connection.execute (bound)]`. The flag is the existing `CallTarget.implicit_receiver`, so it is correct for both bound instance methods and classmethods, and it is honored when the inlined call is rebuilt so arguments are mapped after `self`. Because the flag is part of target identity, `f[g=h]` and `f[g=h (bound)]` are distinct callables. As a side effect, this also fixes pre-existing false negatives where a bound method only reaches its sink through a higher-order wrapper or a decorator (for example `higher_order_function(c.method_to_sink, arg)` and several decorated-method `self` flows), and it annotates bound-method call targets with `implicit_receiver` across several call-graph fixtures (behavior-neutral). Reviewed By: tianhan0 Differential Revision: D108176244 fbshipit-source-id: 0277978e2b6fa6ccf30ff0e0634c912174cb20db
1 parent 0509d31 commit b39a89b

49 files changed

Lines changed: 3857 additions & 924 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

source/interprocedural/callGraphBuilder.ml

Lines changed: 80 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,7 @@ let resolve_callee_ignoring_decorators
14541454
~return_type:(Some (return_type ()))
14551455
~is_class_method
14561456
~is_static_method:static
1457+
~explicit_receiver:true
14571458
(Target.create_method class_name attribute);
14581459
]
14591460
| Some attribute ->
@@ -3063,7 +3064,7 @@ module HigherOrderCallGraph = struct
30633064

30643065
let initialize_from_roots ~pyre_api ~callables_to_definitions_map alist =
30653066
alist
3066-
|> List.filter_map ~f:(fun (root, target) ->
3067+
|> List.filter_map ~f:(fun (root, { Target.ParameterValue.target; implicit_receiver }) ->
30673068
(* ASTs use `TaintAccessPath.parameter_prefix` to distinguish local variables from
30683069
parameters, but using parameters from the define does not result in creating
30693070
parameterized targets whose parameter names contain
@@ -3101,15 +3102,7 @@ module HigherOrderCallGraph = struct
31013102
Some
31023103
( root,
31033104
target
3104-
|> CallTarget.create
3105-
~implicit_receiver:
3106-
(is_implicit_receiver
3107-
~is_static_method
3108-
~is_class_method
3109-
~explicit_receiver:false
3110-
target)
3111-
~is_class_method
3112-
~is_static_method
3105+
|> CallTarget.create ~implicit_receiver ~is_class_method ~is_static_method
31133106
|> CallTarget.Set.singleton ))
31143107
|> of_list
31153108

@@ -3186,13 +3179,16 @@ module HigherOrderCallGraph = struct
31863179
if weak then CallTarget.Set.join existing_callees callees else callees)
31873180

31883181

3189-
let returned_callables call_targets =
3182+
let returned_callables_of_target { CallTarget.target; _ } =
3183+
log "Fetching returned callables for `%a`" Target.pp_pretty target;
3184+
match Context.get_callee_model target with
3185+
| Some { returned_callables; _ } -> returned_callables
3186+
| None -> CallTarget.Set.bottom
3187+
3188+
3189+
let returned_callables_of_targets call_targets =
31903190
call_targets
3191-
|> List.map ~f:(fun { CallTarget.target; _ } ->
3192-
log "Fetching returned callables for `%a`" Target.pp_pretty target;
3193-
match Context.get_callee_model target with
3194-
| Some { returned_callables; _ } -> returned_callables
3195-
| None -> CallTarget.Set.bottom)
3191+
|> List.map ~f:returned_callables_of_target
31963192
|> Algorithms.fold_balanced ~f:CallTarget.Set.join ~init:CallTarget.Set.bottom
31973193

31983194

@@ -3293,13 +3289,38 @@ module HigherOrderCallGraph = struct
32933289
"Decorated targets: `%a`"
32943290
Target.List.pp_pretty_with_kind
32953291
(List.map ~f:CallTarget.target decorated_targets);
3292+
(* When substituting a decorated call target (e.g. `foo@decorated`) with its returned
3293+
callables, the decorated call target carries the type-aware receiver information resolved
3294+
at the call site (`implicit_receiver`). The `@decorated` model's returned callables may not
3295+
preserve it, so we OR the decorated call target's `implicit_receiver` into each returned
3296+
callable: whenever the call site has `implicit_receiver=true`, the substituted callable
3297+
gets it too. We never downgrade a returned callable that is already
3298+
`implicit_receiver=true` (this is an OR, not an overwrite).
3299+
3300+
The returned callable may be a completely different function/method than the decorated
3301+
call-site target (e.g. `returns_function_decorator.replacement`, or a `*args` wrapper
3302+
`inner`), so we keep its own `receiver_class`/`is_class_method`/`is_static_method` and only
3303+
set `implicit_receiver`. *)
3304+
let resolve_decorated_target ({ CallTarget.implicit_receiver; _ } as decorated_target) =
3305+
returned_callables_of_target decorated_target
3306+
|> CallTarget.Set.transform
3307+
CallTarget.Set.Element
3308+
Map
3309+
~f:(fun ({ CallTarget.implicit_receiver = returned_implicit_receiver; _ } as returned)
3310+
->
3311+
{
3312+
returned with
3313+
CallTarget.implicit_receiver = returned_implicit_receiver || implicit_receiver;
3314+
})
3315+
in
32963316
{
32973317
AnalyzeDecoratedTargetsResult.decorated_targets;
32983318
non_decorated_targets;
32993319
result_targets =
3300-
non_decorated_targets
3301-
|> CallTarget.Set.of_list
3302-
|> CallTarget.Set.join (returned_callables decorated_targets)
3320+
decorated_targets
3321+
|> List.map ~f:resolve_decorated_target
3322+
|> Algorithms.fold_balanced ~f:CallTarget.Set.join ~init:CallTarget.Set.bottom
3323+
|> CallTarget.Set.join (CallTarget.Set.of_list non_decorated_targets)
33033324
|> CallTarget.Set.elements;
33043325
}
33053326

@@ -3347,8 +3368,6 @@ module HigherOrderCallGraph = struct
33473368
~location
33483369
~call
33493370
~arguments
3350-
~unresolved
3351-
~override_implicit_receiver
33523371
~argument_callees
33533372
~track_apply_call_step_name
33543373
callee_targets
@@ -3397,8 +3416,8 @@ module HigherOrderCallGraph = struct
33973416
into conflicts in `of_alist_exn` below, which is avoided by excluding those cases,
33983417
such as kwargs and args. *)
33993418
None
3400-
| { TaintAccessPath.root; _ } :: _, Some parameter_target ->
3401-
Some (root, parameter_target.CallTarget.target)
3419+
| { TaintAccessPath.root; _ } :: _, Some { CallTarget.target; implicit_receiver; _ } ->
3420+
Some (root, { Target.ParameterValue.target; implicit_receiver })
34023421
| _ -> (* TODO: Consider the remaining `argument_matches`. *) None
34033422
in
34043423
let non_parameterized_targets ~parameterized_targets call_targets =
@@ -3422,23 +3441,6 @@ module HigherOrderCallGraph = struct
34223441
track_apply_call_step ComputeCalleeTargets (fun () ->
34233442
resolve_decorated_targets callee_targets)
34243443
in
3425-
let recompute_implicit_receiver ({ CallTarget.implicit_receiver; _ } as callee_target) =
3426-
if not override_implicit_receiver then
3427-
implicit_receiver
3428-
else
3429-
match unresolved with
3430-
| Unresolved.True _ ->
3431-
(* Since the original call graphs cannot find the callees, the callee must be
3432-
discovered by higher order call graph building. Since it is unclear whether the
3433-
callee is always a function or a method under any context, the arguments must be
3434-
explicit, unless the callee is a bound method, which is not yet handled. *)
3435-
log
3436-
"Setting `implicit_receiver` to false for callee target `%a`"
3437-
CallTarget.pp
3438-
callee_target;
3439-
false
3440-
| Unresolved.False -> implicit_receiver
3441-
in
34423444
let create_call_target = function
34433445
| Some callee_target :: parameter_targets ->
34443446
let callee_regular, closure =
@@ -3501,18 +3503,16 @@ module HigherOrderCallGraph = struct
35013503
Some right)
35023504
closure
35033505
in
3504-
log "Parameter targets: %a" (Target.ParameterMap.pp Target.pp_pretty) parameters;
3506+
log
3507+
"Parameter targets: %a"
3508+
(Target.ParameterMap.pp Target.ParameterValue.pp_pretty)
3509+
parameters;
35053510
if Target.ParameterMap.is_empty parameters then
35063511
None
35073512
else
35083513
Target.Parameterized { regular = callee_regular; parameters }
35093514
|> validate_target
3510-
>>| fun target ->
3511-
{
3512-
callee_target with
3513-
CallTarget.target;
3514-
implicit_receiver = recompute_implicit_receiver callee_target;
3515-
}
3515+
>>| fun target -> { callee_target with CallTarget.target }
35163516
| _ -> None
35173517
in
35183518
(* Treat an empty list as a single element list so that in each result of the cartesian
@@ -3538,13 +3538,7 @@ module HigherOrderCallGraph = struct
35383538
in
35393539
let non_parameterized_targets =
35403540
track_apply_call_step FindNonParameterizedTargets (fun () ->
3541-
call_targets_from_callee
3542-
|> non_parameterized_targets ~parameterized_targets
3543-
|> List.map ~f:(fun call_target ->
3544-
{
3545-
call_target with
3546-
CallTarget.implicit_receiver = recompute_implicit_receiver call_target;
3547-
}))
3541+
non_parameterized_targets ~parameterized_targets call_targets_from_callee)
35483542
in
35493543
List.iter parameterized_targets ~f:(fun { CallTarget.target; _ } ->
35503544
log "Created parameterized target: `%a`" Target.pp_pretty target);
@@ -3808,8 +3802,6 @@ module HigherOrderCallGraph = struct
38083802
~location
38093803
~call
38103804
~arguments
3811-
~unresolved
3812-
~override_implicit_receiver:true
38133805
~argument_callees
38143806
~track_apply_call_step_name:"callee_return_targets"
38153807
in
@@ -3824,8 +3816,6 @@ module HigherOrderCallGraph = struct
38243816
~location
38253817
~call
38263818
~arguments
3827-
~unresolved
3828-
~override_implicit_receiver:false
38293819
~argument_callees
38303820
~track_apply_call_step_name:"call_targets"
38313821
original_call_targets
@@ -3846,8 +3836,6 @@ module HigherOrderCallGraph = struct
38463836
~location
38473837
~call
38483838
~arguments
3849-
~unresolved
3850-
~override_implicit_receiver:false
38513839
~argument_callees
38523840
~track_apply_call_step_name:"init_targets"
38533841
original_init_targets
@@ -3930,7 +3918,7 @@ module HigherOrderCallGraph = struct
39303918
!Context.output_define_call_graph);
39313919
track_apply_call_step FetchReturnedCallables (fun () ->
39323920
let returned_callables_from_call =
3933-
new_call_targets |> List.rev_append new_init_targets |> returned_callables
3921+
new_call_targets |> List.rev_append new_init_targets |> returned_callables_of_targets
39343922
in
39353923
(* To avoid false negatives when analyzing targets with `kind=Decorated`, sometimes
39363924
* we allow all function-typed arguments to be passed directly to the return values.
@@ -4421,7 +4409,8 @@ module HigherOrderCallGraph = struct
44214409
state
44224410
|> State.get variable
44234411
|> CallTarget.Set.elements
4424-
|> List.map ~f:CallTarget.target
4412+
|> List.map ~f:(fun { CallTarget.target; implicit_receiver; _ } ->
4413+
{ Target.ParameterValue.target; implicit_receiver })
44254414
in
44264415
(* Sometimes a captured variable does not have a record in `state`, but
44274416
we still want to create a callee with the captured variables that have
@@ -4883,69 +4872,51 @@ let call_graph_of_decorated_callable
48834872
~callees_at_location:call_graph
48844873
expression
48854874
in
4886-
(* If Pyre cannot resolve the callable on the callable expression, we hardcode the callable. *)
4887-
let add_callees_for_original_function_name_if_unresolved
4875+
(* Add a call graph edge for the decorated function itself, in the return expression
4876+
`decorator1(decorator2(original_function))`. *)
4877+
let add_callees_for_original_function_name
48884878
~callee
48894879
~original_function_name
48904880
~original_function_name_location
48914881
~call_graph
48924882
=
48934883
let original_function_name =
48944884
match original_function_name with
4895-
| Name.Attribute attribute -> attribute
4885+
| Name.Identifier name -> name
48964886
| original_function_name ->
48974887
Format.asprintf
4898-
"Expect the decorated callable to be an attribute but got `%a`"
4888+
"Expect the decorated callable to be an identifier, but got `%a`"
48994889
Name.pp
49004890
original_function_name
49014891
|> failwith
49024892
in
4903-
let should_add_callable =
4904-
!call_graph
4905-
|> DefineCallGraph.resolve_attribute_access
4906-
~location:original_function_name_location
4907-
~attribute_access:original_function_name
4908-
>>| (fun {
4909-
AttributeAccessCallees.if_called =
4910-
{ CallCallees.call_targets = callable_targets; _ };
4911-
property_targets;
4912-
_;
4913-
} -> List.is_empty callable_targets && List.is_empty property_targets)
4914-
|> Option.value ~default:true
4893+
let is_class_method, is_static_method =
4894+
CallablesSharedMemory.ReadOnly.get_method_kind callables_to_definitions_map callee
49154895
in
4916-
if should_add_callable then
4917-
let is_class_method, is_static_method =
4918-
CallablesSharedMemory.ReadOnly.get_method_kind callables_to_definitions_map callee
4919-
in
4920-
call_graph :=
4921-
DefineCallGraph.set_attribute_access_callees
4922-
~error_if_new:false (* empty attribute accesses are stripped *)
4923-
~location:original_function_name_location
4924-
~attribute_access:original_function_name
4925-
~callees:
4926-
(AttributeAccessCallees.create
4927-
~if_called:
4928-
(CallCallees.create
4929-
~call_targets:
4930-
[
4931-
CallTarget.create
4932-
~implicit_receiver:
4933-
(is_implicit_receiver
4934-
~is_static_method
4935-
~is_class_method
4936-
~explicit_receiver:false
4937-
callee)
4938-
~is_class_method
4939-
~is_static_method
4940-
callee;
4941-
]
4942-
())
4943-
())
4944-
!call_graph
4896+
call_graph :=
4897+
DefineCallGraph.set_identifier_callees
4898+
~error_if_new:false
4899+
~location:original_function_name_location
4900+
~identifier:original_function_name
4901+
~identifier_callees:
4902+
(IdentifierCallees.create
4903+
~if_called:
4904+
(CallCallees.create
4905+
~call_targets:
4906+
[
4907+
CallTarget.create
4908+
~implicit_receiver:false
4909+
~is_class_method
4910+
~is_static_method
4911+
callee;
4912+
]
4913+
())
4914+
())
4915+
!call_graph
49454916
in
49464917
let call_graph = ref DefineCallGraph.empty in
49474918
resolve_callees ~call_graph return_expression;
4948-
add_callees_for_original_function_name_if_unresolved
4919+
add_callees_for_original_function_name
49494920
~callee:callable
49504921
~original_function_name
49514922
~original_function_name_location
@@ -5200,12 +5171,7 @@ let build_whole_program_call_graph_for_pyrefly
52005171
~call_targets:
52015172
[
52025173
CallTarget.create
5203-
~implicit_receiver:
5204-
(is_implicit_receiver
5205-
~is_static_method
5206-
~is_class_method
5207-
~explicit_receiver:false
5208-
original_callable)
5174+
~implicit_receiver:false
52095175
~is_class_method
52105176
~is_static_method
52115177
original_callable;

source/interprocedural/callGraphBuilder.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ module HigherOrderCallGraph : sig
119119
val initialize_from_roots
120120
: pyre_api:PyrePysaApi.ReadOnly.t ->
121121
callables_to_definitions_map:CallablesSharedMemory.ReadOnly.t ->
122-
(Analysis.TaintAccessPath.Root.t * Target.t) list ->
122+
(Analysis.TaintAccessPath.Root.t * Target.ParameterValue.t) list ->
123123
t
124124

125125
val initialize_from_callable

0 commit comments

Comments
 (0)