Skip to content

Commit 0245b79

Browse files
arthaudfacebook-github-bot
authored andcommitted
Support sanitizing all parameters in the analysis
Summary: This implements all parameters sanitizers and adds documentation for it. Reviewed By: pradeep90 Differential Revision: D30948252 fbshipit-source-id: a503d8845a2a27cb046d8233298374eb931006f4
1 parent fd9c9fd commit 0245b79

File tree

8 files changed

+729
-8
lines changed

8 files changed

+729
-8
lines changed

documentation/website/docs/pysa_basics.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def django.utils.html.escape(text: TaintInTaintOut): ...
281281
def module.sanitize_for_logging_and_sql(): ...
282282
```
283283

284-
Parameters can be marked as sanitized to remove all taint passing through them:
284+
Specific parameters can be marked as sanitized to remove all taint passing through them:
285285

286286
```python
287287
def module.safe_function(
@@ -302,6 +302,19 @@ def modules.safe_return_source() -> Sanitize[TaintSource]: ...
302302
def modules.return_not_user_controlled() -> Sanitize[TaintSource[UserControlled]]: ...
303303
```
304304

305+
All parameters can be marked as sanitized as well:
306+
307+
```python
308+
@Sanitize(Parameters)
309+
def module.sanitize_all_parameters(): ...
310+
311+
@Sanitize(Parameters[TaintSource[UserControlled]))
312+
def module.parameters_not_user_controlled(): ...
313+
314+
@Sanitize(Parameters[TaintInTaintOut[TaintSource[UserControlled], TaintSink[SQL]]]))
315+
def module.parameters_not_taint_in_taint_out(): ...
316+
```
317+
305318
Attributes can also be marked as sanitizers to remove all taint passing through
306319
them:
307320

source/interprocedural_analyses/taint/backwardAnalysis.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ module AnalysisInstance (FunctionContext : FUNCTION_CONTEXT) = struct
275275
List.map
276276
~f:(fun { AccessPath.root; _ } -> SanitizeRootMap.get root sanitizers.roots)
277277
sanitize_matches
278-
|> List.fold ~f:Sanitize.join ~init:sanitizers.global
278+
|> List.fold ~f:Sanitize.join ~init:Sanitize.empty
279+
|> Sanitize.join sanitizers.global
280+
|> Sanitize.join sanitizers.parameters
279281
in
280282
match sanitize.Sanitize.tito with
281283
| Some AllTito -> BackwardState.Tree.bottom

source/interprocedural_analyses/taint/forwardAnalysis.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,9 @@ module AnalysisInstance (FunctionContext : FUNCTION_CONTEXT) = struct
183183
List.map
184184
~f:(fun { AccessPath.root; _ } -> SanitizeRootMap.get root sanitizers.roots)
185185
sanitize_matches
186-
|> List.fold ~f:Sanitize.join ~init:sanitizers.global
186+
|> List.fold ~f:Sanitize.join ~init:Sanitize.empty
187+
|> Sanitize.join sanitizers.global
188+
|> Sanitize.join sanitizers.parameters
187189
in
188190
match sanitize.Sanitize.tito with
189191
| Some AllTito -> ForwardState.Tree.bottom

source/interprocedural_analyses/taint/model.ml

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ let apply_sanitizers
468468
{
469469
forward = { source_taint };
470470
backward = { taint_in_taint_out; sink_taint };
471-
sanitizers = { global; roots; _ } as sanitizers;
471+
sanitizers = { global; parameters; roots } as sanitizers;
472472
modes;
473473
}
474474
=
@@ -487,7 +487,19 @@ let apply_sanitizers
487487
let taint_in_taint_out =
488488
match global.tito with
489489
| Some AllTito -> BackwardState.empty
490-
| _ -> taint_in_taint_out
490+
| _ ->
491+
(* We cannot apply source or sink specific taint-in-taint-out sanitizers
492+
* here because the tito model does not know about source or sink kinds.
493+
*
494+
* For instance, in `def f(x): return x`, we infer that `f` propagates
495+
* the taint from `x` to its return value, regardless of the source or
496+
* sink kind.
497+
*
498+
* Therefore, we apply those in `apply_call_target` in the forward and
499+
* backward analysis, where we actually see source and sink kinds of the
500+
* arguments.
501+
*)
502+
taint_in_taint_out
491503
in
492504
let sink_taint =
493505
match global.sinks with
@@ -500,6 +512,23 @@ let apply_sanitizers
500512
sink_taint
501513
| None -> sink_taint
502514
in
515+
(* Apply the parameters sanitizer. *)
516+
let taint_in_taint_out =
517+
match parameters.tito with
518+
| Some AllTito -> BackwardState.empty
519+
| _ -> taint_in_taint_out
520+
in
521+
let sink_taint =
522+
match parameters.sinks with
523+
| Some Sanitize.AllSinks -> BackwardState.empty
524+
| Some (Sanitize.SpecificSinks sanitized_sinks) ->
525+
BackwardState.transform
526+
BackwardTaint.kind
527+
Filter
528+
~f:(fun sink -> not (Sinks.Set.mem sink sanitized_sinks))
529+
sink_taint
530+
| None -> sink_taint
531+
in
503532
(* Apply root specific sanitizers. *)
504533
let sanitize_root (root, sanitize) (source_taint, taint_in_taint_out, sink_taint) =
505534
let source_taint =

source/interprocedural_analyses/taint/test/integration/sanitize.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def no_propagation_with_sanitize_parameter_a_sink_tito(x):
464464

465465

466466
def propagation_of_b_with_sanitize_parameter_a_sink_tito(x):
467-
y = sanitize_a_sink_tito(x)
467+
y = sanitize_parameter_a_sink_tito(x)
468468
b_sink(y)
469469

470470

@@ -508,3 +508,101 @@ def sanitize_return_a_and_b_source():
508508

509509
def sanitize_return_with_user_declared_source(x):
510510
return 0
511+
512+
513+
def sanitize_all_parameters(x, y):
514+
_test_sink(x)
515+
_test_sink(y)
516+
return source_with_tito(x) + source_with_tito(y)
517+
518+
519+
def sanitize_all_parameters_all_sources(x, y):
520+
_test_sink(x)
521+
_test_sink(y)
522+
return source_with_tito(x) + source_with_tito(y)
523+
524+
525+
def sanitize_all_parameters_all_sinks(x, y):
526+
_test_sink(x)
527+
_test_sink(y)
528+
return source_with_tito(x) + source_with_tito(y)
529+
530+
531+
def sanitize_all_parameters_all_tito(x, y):
532+
_test_sink(x)
533+
_test_sink(y)
534+
return source_with_tito(x) + source_with_tito(y)
535+
536+
537+
def sanitize_all_parameters_a_sink(x):
538+
if 1 > 2:
539+
a_sink(x)
540+
else:
541+
b_sink(x)
542+
543+
544+
def sanitize_all_parameters_b_sink(x):
545+
if 1 > 2:
546+
a_sink(x)
547+
else:
548+
b_sink(x)
549+
550+
551+
def sanitize_all_parameters_a_source_tito(x):
552+
return x
553+
554+
555+
def no_propagation_with_sanitize_all_parameters_a_source_tito():
556+
a = a_source()
557+
b = sanitize_all_parameters_a_source_tito(a)
558+
return b
559+
560+
561+
def propagation_of_b_with_sanitize_all_parameters_a_source_tito():
562+
b = b_source()
563+
tito = sanitize_all_parameters_a_source_tito(b)
564+
return tito
565+
566+
567+
def sanitize_all_parameters_a_sink_tito(x):
568+
return x
569+
570+
571+
def no_propagation_with_sanitize_all_parameters_a_sink_tito(x):
572+
y = sanitize_all_parameters_a_sink_tito(x)
573+
a_sink(y)
574+
575+
576+
def propagation_of_b_with_sanitize_all_parameters_a_sink_tito(x):
577+
y = sanitize_all_parameters_a_sink_tito(x)
578+
b_sink(y)
579+
580+
581+
def sanitize_all_parameters_a_source_sink_tito(x):
582+
return x
583+
584+
585+
def no_propagation_of_a_source_with_sanitize_all_parameters_a_source_sink_tito():
586+
a = a_source()
587+
b = sanitize_all_parameters_a_source_sink_tito(a)
588+
return b
589+
590+
591+
def propagation_of_b_source_with_sanitize_all_parameters_a_source_sink_tito():
592+
b = b_source()
593+
tito = sanitize_all_parameters_a_source_sink_tito(b)
594+
return tito
595+
596+
597+
def no_propagation_of_a_sink_with_sanitize_all_parameters_a_source_sink_tito(x):
598+
y = sanitize_all_parameters_a_source_sink_tito(x)
599+
a_sink(y)
600+
601+
602+
def propagation_of_b_sink_with_sanitize_all_parameters_a_source_sink_tito(x):
603+
y = sanitize_all_parameters_a_sink_tito(x)
604+
b_sink(y)
605+
606+
607+
def sanitize_all_parameters_with_user_declared_sink(x):
608+
return x

source/interprocedural_analyses/taint/test/integration/sanitize.py.cg

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,20 @@ sanitize.C_sanitized_all_sources::__init__ (method) -> []
99
sanitize.C_sanitized_b_sink::__init__ (method) -> []
1010
sanitize.C_sanitized_b_source::__init__ (method) -> []
1111
sanitize.no_propagation_of_a_sink (fun) -> [sanitize.a_sink (fun) sanitize.sanitize_a_sink_tito (fun)]
12+
sanitize.no_propagation_of_a_sink_with_sanitize_all_parameters_a_source_sink_tito (fun) -> [sanitize.a_sink (fun) sanitize.sanitize_all_parameters_a_source_sink_tito (fun)]
13+
sanitize.no_propagation_of_a_source_with_sanitize_all_parameters_a_source_sink_tito (fun) -> [sanitize.a_source (fun) sanitize.sanitize_all_parameters_a_source_sink_tito (fun)]
1214
sanitize.no_propagation_with_sanitize_a_tito (fun) -> [sanitize.a_source (fun) sanitize.sanitize_a_tito (fun)]
15+
sanitize.no_propagation_with_sanitize_all_parameters_a_sink_tito (fun) -> [sanitize.a_sink (fun) sanitize.sanitize_all_parameters_a_sink_tito (fun)]
16+
sanitize.no_propagation_with_sanitize_all_parameters_a_source_tito (fun) -> [sanitize.a_source (fun) sanitize.sanitize_all_parameters_a_source_tito (fun)]
1317
sanitize.no_propagation_with_sanitize_parameter_a_sink_tito (fun) -> [sanitize.a_sink (fun) sanitize.sanitize_parameter_a_sink_tito (fun)]
1418
sanitize.no_propagation_with_sanitize_parameter_a_source_tito (fun) -> [sanitize.a_source (fun) sanitize.sanitize_parameter_a_source_tito (fun)]
1519
sanitize.propagation_of_b_sink (fun) -> [sanitize.b_sink (fun) sanitize.sanitize_a_sink_tito (fun)]
20+
sanitize.propagation_of_b_sink_with_sanitize_all_parameters_a_source_sink_tito (fun) -> [sanitize.b_sink (fun) sanitize.sanitize_all_parameters_a_sink_tito (fun)]
21+
sanitize.propagation_of_b_source_with_sanitize_all_parameters_a_source_sink_tito (fun) -> [sanitize.b_source (fun) sanitize.sanitize_all_parameters_a_source_sink_tito (fun)]
1622
sanitize.propagation_of_b_with_sanitize_a_tito (fun) -> [sanitize.b_source (fun) sanitize.sanitize_a_tito (fun)]
17-
sanitize.propagation_of_b_with_sanitize_parameter_a_sink_tito (fun) -> [sanitize.b_sink (fun) sanitize.sanitize_a_sink_tito (fun)]
23+
sanitize.propagation_of_b_with_sanitize_all_parameters_a_sink_tito (fun) -> [sanitize.b_sink (fun) sanitize.sanitize_all_parameters_a_sink_tito (fun)]
24+
sanitize.propagation_of_b_with_sanitize_all_parameters_a_source_tito (fun) -> [sanitize.b_source (fun) sanitize.sanitize_all_parameters_a_source_tito (fun)]
25+
sanitize.propagation_of_b_with_sanitize_parameter_a_sink_tito (fun) -> [sanitize.b_sink (fun) sanitize.sanitize_parameter_a_sink_tito (fun)]
1826
sanitize.propagation_of_b_with_sanitize_parameter_a_source_tito (fun) -> [sanitize.b_source (fun) sanitize.sanitize_parameter_a_source_tito (fun)]
1927
sanitize.return_taint_sanitize (fun) -> []
2028
sanitize.sanitize_a_and_b_sinks (fun) -> [int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
@@ -27,6 +35,16 @@ sanitize.sanitize_a_tito (fun) -> []
2735
sanitize.sanitize_a_tito_with_sink (fun) -> [sanitize.a_sink (fun)]
2836
sanitize.sanitize_ab_sinks_attribute (fun) -> [int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
2937
sanitize.sanitize_ab_sinks_instance (fun) -> [int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
38+
sanitize.sanitize_all_parameters (fun) -> [_test_sink (fun) sanitize.source_with_tito (fun)]
39+
sanitize.sanitize_all_parameters_a_sink (fun) -> [int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
40+
sanitize.sanitize_all_parameters_a_sink_tito (fun) -> []
41+
sanitize.sanitize_all_parameters_a_source_sink_tito (fun) -> []
42+
sanitize.sanitize_all_parameters_a_source_tito (fun) -> []
43+
sanitize.sanitize_all_parameters_all_sinks (fun) -> [_test_sink (fun) sanitize.source_with_tito (fun)]
44+
sanitize.sanitize_all_parameters_all_sources (fun) -> [_test_sink (fun) sanitize.source_with_tito (fun)]
45+
sanitize.sanitize_all_parameters_all_tito (fun) -> [_test_sink (fun) sanitize.source_with_tito (fun)]
46+
sanitize.sanitize_all_parameters_b_sink (fun) -> [int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
47+
sanitize.sanitize_all_parameters_with_user_declared_sink (fun) -> []
3048
sanitize.sanitize_all_sinks_attribute (fun) -> [_test_sink (fun) int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
3149
sanitize.sanitize_all_sinks_instance (fun) -> [_test_sink (fun) int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]
3250
sanitize.sanitize_b_sink (fun) -> [int::__gt__ (method) int::__le__ (method) sanitize.a_sink (fun) sanitize.b_sink (fun)]

0 commit comments

Comments
 (0)