Skip to content

Commit 214aa34

Browse files
fahndrichfacebook-github-bot
authored andcommitted
Stop writing strings in issue_instances and trace_frames
Differential Revision: D15016189 fbshipit-source-id: 6d558bc7ee411f989ea3a60b776ed31848e3a750
1 parent d50022c commit 214aa34

File tree

5 files changed

+55
-74
lines changed

5 files changed

+55
-74
lines changed

sapp/sapp/model_generator.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,6 @@ def _generate_issue(self, run, entry, callablesCount):
147147
id=IssueDBID(),
148148
code=entry["code"],
149149
handle=handle,
150-
callable=callable,
151-
filename=entry["filename"],
152150
status=IssueStatus.UNCATEGORIZED,
153151
first_seen=run.date,
154152
run_id=run.id,
@@ -174,7 +172,6 @@ def _generate_issue(self, run, entry, callablesCount):
174172
id=DBID(),
175173
issue_id=issue.id,
176174
location=self.get_location(entry),
177-
filename=entry["filename"],
178175
filename_id=filename_record.id,
179176
callable_id=callable_record.id,
180177
run_id=run.id,
@@ -187,7 +184,7 @@ def _generate_issue(self, run, entry, callablesCount):
187184
min_trace_length_to_sinks=self._get_minimum_trace_length(
188185
entry["preconditions"]
189186
),
190-
callable_count=callablesCount[issue.callable],
187+
callable_count=callablesCount[callable],
191188
)
192189

193190
for sink in final_sinks:
@@ -226,20 +223,21 @@ def _generate_issue_postcondition(self, run, issue, callinfo):
226223
callinfo["leaves"], # sources
227224
callinfo["type_interval"],
228225
)
229-
keys = [(callee, callee_port)]
226+
keys = [(call_tf.callee_id, callee_port)]
230227
while len(keys) > 0:
231228
key = keys.pop()
232229
if self.graph.has_postconditions_with_caller(key[0], key[1]):
233230
continue
234231

232+
key = (self.graph.get_text(key[0]), key[1])
235233
new = [
236234
self._generate_postcondition(run, e)
237235
for e in self.summary["postcondition_entries"].pop(key, [])
238236
]
239237
if len(new) == 0 and key[1] != "source":
240238
self.summary["missing_postconditions"].add(key)
241239

242-
keys.extend([(tf.callee, tf.callee_port) for tf in new])
240+
keys.extend([(tf.callee_id, tf.callee_port) for tf in new])
243241

244242
return call_tf
245243

@@ -280,16 +278,13 @@ def _generate_raw_postcondition(
280278
trace_frame = TraceFrame.Record(
281279
id=DBID(),
282280
kind=TraceKind.POSTCONDITION,
283-
caller=caller,
284281
caller_id=caller_record.id,
285-
callee=callee,
286282
callee_id=callee_record.id,
287283
callee_location=SourceLocation(
288284
callee_location["line"],
289285
callee_location["start"],
290286
callee_location["end"],
291287
),
292-
filename=filename,
293288
filename_id=filename_record.id,
294289
run_id=run.id,
295290
caller_port=caller_port,
@@ -329,20 +324,21 @@ def _generate_issue_precondition(self, run, issue, callinfo):
329324
callinfo["type_interval"],
330325
callinfo["features"],
331326
)
332-
keys = [(callee, callee_port)]
327+
keys = [(call_tf.callee_id, callee_port)]
333328
while len(keys) > 0:
334329
key = keys.pop()
335330
if self.graph.has_preconditions_with_caller(key[0], key[1]):
336331
continue
337332

333+
key = (self.graph.get_text(key[0]), key[1])
338334
new = [
339335
self._generate_precondition(run, e)
340336
for e in self.summary["precondition_entries"].pop(key, [])
341337
]
342338
if len(new) == 0 and key[1] != "sink":
343339
self.summary["missing_preconditions"].add(key)
344340

345-
keys.extend([(tf.callee, tf.callee_port) for tf in new])
341+
keys.extend([(tf.callee_id, tf.callee_port) for tf in new])
346342

347343
return call_tf
348344

@@ -399,18 +395,15 @@ def _generate_raw_precondition(
399395
trace_frame = TraceFrame.Record(
400396
id=DBID(),
401397
kind=TraceKind.PRECONDITION,
402-
caller=caller,
403398
caller_id=caller_record.id,
404399
caller_port=caller_port,
405-
callee=callee,
406400
callee_id=callee_record.id,
407401
callee_port=callee_port,
408402
callee_location=SourceLocation(
409403
callee_location["line"],
410404
callee_location["start"],
411405
callee_location["end"],
412406
),
413-
filename=filename,
414407
filename_id=filename_record.id,
415408
titos=titos,
416409
run_id=run.id,
@@ -467,11 +460,10 @@ def _get_issue_handle(self, entry):
467460
return entry["handle"]
468461

469462
def _get_shared_text(self, kind, name):
463+
name = name[:SHARED_TEXT_LENGTH]
470464
shared_text = self.graph.get_shared_text(kind, name)
471465
if shared_text is None:
472-
shared_text = SharedText.Record(
473-
id=DBID(), contents=name[:SHARED_TEXT_LENGTH], kind=kind
474-
)
466+
shared_text = SharedText.Record(id=DBID(), contents=name, kind=kind)
475467
self.graph.add_shared_text(shared_text)
476468
return shared_text
477469

sapp/sapp/models.py

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -635,17 +635,22 @@ class IssueInstance(Base, PrepareMixin, MutableRecordMixin): # noqa
635635
doc="Location (possibly a range) of the issue",
636636
)
637637

638-
filename = Column(
639-
String(length=767),
640-
doc="Filename containing the issue",
641-
nullable=True,
642-
index=True,
643-
)
644-
645638
filename_id = Column(BIGDBIDType, nullable=False, server_default="0", default=0)
646639

640+
filename = relationship(
641+
"SharedText",
642+
primaryjoin="foreign(SharedText.id) == IssueInstance.filename_id",
643+
uselist=False,
644+
)
645+
647646
callable_id = Column(BIGDBIDType, nullable=False, server_default="0", default=0)
648647

648+
callable = relationship(
649+
"SharedText",
650+
primaryjoin="foreign(SharedText.id) == IssueInstance.callable_id",
651+
uselist=False,
652+
)
653+
649654
is_new_issue = Column(
650655
Boolean,
651656
index=True,
@@ -795,13 +800,6 @@ class Issue(Base, PrepareMixin, MutableRecordMixin): # noqa
795800
Integer, doc="Code identifiying the issue type", nullable=False, index=True
796801
)
797802

798-
callable = Column(
799-
String(length=INNODB_MAX_INDEX_LENGTH),
800-
doc="Callable containing the issue",
801-
nullable=False,
802-
index=True,
803-
)
804-
805803
instances = relationship(
806804
"IssueInstance",
807805
primaryjoin="Issue.id == foreign(IssueInstance.issue_id)",
@@ -1090,12 +1088,6 @@ class TraceFrame(Base, PrepareMixin, RecordMixin): # noqa
10901088

10911089
kind = Column(Enum(TraceKind), nullable=False, index=False)
10921090

1093-
caller: str = Column(
1094-
String(length=INNODB_MAX_INDEX_LENGTH),
1095-
nullable=False,
1096-
doc="The function/method that produces the tainted trace",
1097-
)
1098-
10991091
caller_id = Column(BIGDBIDType, nullable=False, server_default="0", default=0)
11001092

11011093
caller_port: str = Column(
@@ -1105,12 +1097,6 @@ class TraceFrame(Base, PrepareMixin, RecordMixin): # noqa
11051097
doc="The caller port of this call edge",
11061098
)
11071099

1108-
callee: str = Column(
1109-
String(length=INNODB_MAX_INDEX_LENGTH),
1110-
nullable=False,
1111-
doc="The function/method within the caller that produces the tainted trace.",
1112-
)
1113-
11141100
callee_id = Column(BIGDBIDType, nullable=False, server_default="0", default=0)
11151101

11161102
callee_location = Column(
@@ -1126,10 +1112,6 @@ class TraceFrame(Base, PrepareMixin, RecordMixin): # noqa
11261112
doc="The callee port of this call edge'",
11271113
)
11281114

1129-
filename = Column(
1130-
String(length=4096), doc="Filename containing the call", nullable=False
1131-
)
1132-
11331115
filename_id = Column(BIGDBIDType, nullable=False, server_default="0", default=0)
11341116

11351117
run_id = Column("run_id", BIGDBIDType, nullable=False, index=False)

sapp/sapp/tests/fake_object_generator.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,11 @@ def precondition(
8282
trace_frame = TraceFrame.Record(
8383
id=DBID(),
8484
kind=TraceKind.PRECONDITION,
85-
caller=caller,
8685
caller_id=caller_record.id,
8786
caller_port=caller_port,
88-
callee=callee,
8987
callee_id=callee_record.id,
9088
callee_port=callee_port,
9189
callee_location=SourceLocation(location[0], location[1], location[2]),
92-
filename=filename,
9390
filename_id=filename_record.id,
9491
titos=[],
9592
run_id=self.run_id,
@@ -119,14 +116,11 @@ def postcondition(
119116
trace_frame = TraceFrame.Record(
120117
id=DBID(),
121118
kind=TraceKind.POSTCONDITION,
122-
caller=caller,
123119
caller_id=caller_record.id,
124120
caller_port=caller_port,
125-
callee=callee,
126121
callee_id=callee_record.id,
127122
callee_port=callee_port,
128123
callee_location=SourceLocation(location[0], location[1], location[2]),
129-
filename=filename,
130124
filename_id=filename_record.id,
131125
titos=[],
132126
run_id=self.run_id,
@@ -142,6 +136,11 @@ def postcondition(
142136
return trace_frame
143137

144138
def shared_text(self, contents, kind):
139+
if self.graph:
140+
shared_text = self.graph.get_shared_text(kind, contents)
141+
if shared_text is not None:
142+
return shared_text
143+
145144
result = SharedText.Record(id=DBID(), contents=contents, kind=kind)
146145
if self.graph:
147146
self.graph.add_shared_text(result)
@@ -194,7 +193,6 @@ def instance(
194193
result = IssueInstance.Record(
195194
id=DBID(),
196195
location=SourceLocation(6, 7, 8),
197-
filename=filename.contents,
198196
filename_id=filename.id,
199197
message_id=message.id,
200198
callable_id=callable.id,

sapp/sapp/trace_graph.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,17 @@ def __init__(self) -> None:
3131
self._issue_instances: Dict[int, IssueInstance] = {}
3232
self._trace_annotations: Dict[int, TraceFrameAnnotation] = {}
3333

34-
# Create a mapping of (caller, caller_port) to the corresponding
34+
# Create a mapping of (caller_id, caller_port) to the corresponding
3535
# trace frame's id.
3636
self._trace_frames_map: DefaultDict[ # pyre-ignore: T41307149
37-
Tuple[str, str], Set[int]
37+
Tuple[int, str], Set[int]
3838
] = defaultdict(set)
3939

4040
# Similar to _trace_frames_map, but maps the reverse direction
41-
# of the trace graph, i.e. (callee[, callee_port]) to the
41+
# of the trace graph, i.e. (callee_id, callee_port) to the
4242
# trace_frame_id.
4343
self._trace_frames_rev_map: DefaultDict[ # pyre-ignore: T41307149
44-
Tuple[str, str], Set[int]
44+
Tuple[int, str], Set[int]
4545
] = defaultdict(set)
4646

4747
self._trace_frames: Dict[int, TraceFrame] = {}
@@ -105,6 +105,9 @@ def add_issue_instance_fix_info(
105105
), "Instance fix info already exists"
106106
self._issue_instance_fix_info[instance.id.local_id] = fix_info
107107

108+
def get_text(self, shared_text_id: DBID) -> str:
109+
return self._shared_texts[shared_text_id.local_id].contents
110+
108111
def get_shared_text(
109112
self, kind: SharedTextKind, content: str
110113
) -> Optional[SharedText]:
@@ -114,17 +117,17 @@ def get_shared_text(
114117
return self._shared_texts[contents[content]]
115118
return None
116119

117-
def has_postconditions_with_caller(self, caller: str, caller_port: str) -> bool:
118-
key = (caller, caller_port)
120+
def has_postconditions_with_caller(self, caller_id: DBID, caller_port: str) -> bool:
121+
key = (caller_id.local_id, caller_port)
119122
post_ids = {
120123
tf_id
121124
for tf_id in self._trace_frames_map[key]
122125
if self._trace_frames[tf_id].kind == TraceKind.POSTCONDITION
123126
}
124127
return len(post_ids) != 0
125128

126-
def has_preconditions_with_caller(self, caller: str, caller_port: str) -> bool:
127-
key = (caller, caller_port)
129+
def has_preconditions_with_caller(self, caller_id: DBID, caller_port: str) -> bool:
130+
key = (caller_id.local_id, caller_port)
128131
pre_ids = {
129132
tf_id
130133
for tf_id in self._trace_frames_map[key]
@@ -143,21 +146,21 @@ def get_precondition_annotations(self, pre_id: int) -> List[TraceFrameAnnotation
143146
]
144147

145148
def add_trace_frame(self, trace_frame: TraceFrame) -> None:
146-
key = (trace_frame.caller, trace_frame.caller_port)
147-
rev_key = (trace_frame.callee, trace_frame.callee_port)
149+
key = (trace_frame.caller_id.local_id, trace_frame.caller_port)
150+
rev_key = (trace_frame.callee_id.local_id, trace_frame.callee_port)
148151
self._trace_frames_map[key].add(trace_frame.id.local_id)
149152
self._trace_frames_rev_map[rev_key].add(trace_frame.id.local_id)
150153
self._trace_frames[trace_frame.id.local_id] = trace_frame
151154

152-
def has_trace_frame_with_caller(self, caller: str, caller_port: str) -> bool:
153-
key = (caller, caller_port)
155+
def has_trace_frame_with_caller(self, caller_id: DBID, caller_port: str) -> bool:
156+
key = (caller_id.local_id, caller_port)
154157
return key in self._trace_frames_map
155158

156159
def get_trace_frames_from_caller(
157-
self, caller: str, caller_port: str
160+
self, caller_id: DBID, caller_port: str
158161
) -> List[TraceFrame]:
159-
if self.has_trace_frame_with_caller(caller, caller_port):
160-
key = (caller, caller_port)
162+
if self.has_trace_frame_with_caller(caller_id, caller_port):
163+
key = (caller_id.local_id, caller_port)
161164
return [
162165
self._trace_frames[trace_frame_id]
163166
for trace_frame_id in self._trace_frames_map[key]
@@ -172,6 +175,10 @@ def add_shared_text(self, shared_text: SharedText) -> None:
172175
assert (
173176
shared_text.id.local_id not in self._shared_texts
174177
), "Shared text already exists"
178+
assert (
179+
shared_text.kind not in self._shared_text_lookup
180+
or shared_text.contents not in self._shared_text_lookup[shared_text.kind]
181+
), "Shared text with same kind, contents exists"
175182

176183
self._shared_texts[shared_text.id.local_id] = shared_text
177184

sapp/sapp/trimmed_trace_graph.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ def _populate_affected_issues(self, graph: TraceGraph) -> None:
8787
affected_instance_ids = [
8888
instance.id.local_id
8989
for instance in graph._issue_instances.values()
90-
if self._is_filename_prefixed_with(instance.filename, self._affected_files)
90+
if self._is_filename_prefixed_with(
91+
graph.get_text(instance.filename_id), self._affected_files
92+
)
9193
]
9294

9395
for instance_id in affected_instance_ids:
@@ -128,7 +130,7 @@ def _populate_issues_from_affected_trace_frames(self, graph: TraceGraph) -> None
128130
trace_frame
129131
for trace_frame in graph._trace_frames.values()
130132
if self._is_filename_prefixed_with(
131-
trace_frame.filename, self._affected_files
133+
graph.get_text(trace_frame.filename_id), self._affected_files
132134
)
133135
]
134136

@@ -142,7 +144,7 @@ def _populate_issues_from_affected_trace_frames(self, graph: TraceGraph) -> None
142144
[
143145
graph._trace_frames[trace_frame_id]
144146
for trace_frame_id in graph._trace_frames_rev_map[
145-
(trace_frame.caller, trace_frame.caller_port)
147+
(trace_frame.caller_id.local_id, trace_frame.caller_port)
146148
]
147149
if graph._trace_frames[trace_frame_id].kind == trace_frame.kind
148150
]
@@ -274,7 +276,7 @@ def _populate_issue_trace(
274276
filtered_ids = []
275277
for trace_frame_id in trace_frame_ids:
276278
frame = graph._trace_frames[trace_frame_id]
277-
if not kind or kind == frame.kind:
279+
if kind is None or kind == frame.kind:
278280
self.add_issue_instance_trace_frame_assoc(instance, frame)
279281
filtered_ids.append(trace_frame_id)
280282
self._populate_trace(graph, filtered_ids)
@@ -318,7 +320,7 @@ def _populate_trace(self, graph: TraceGraph, trace_frame_ids: List[int]) -> None
318320
self._add_trace_frame(graph, trace_frame)
319321
self._visited_trace_frame_ids.add(trace_frame_id)
320322

321-
key = (trace_frame.callee, trace_frame.callee_port)
323+
key = (trace_frame.callee_id.local_id, trace_frame.callee_port)
322324
trace_frame_ids.extend(
323325
[
324326
trace_frame_id

0 commit comments

Comments
 (0)