-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[KeyInstr] Merge atoms in DILocation::getMergedLocation #133480
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-debuginfo Author: Orlando Cazalet-Hyams (OCHyams) ChangesNFC for builds with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=OFF (default). In an ideal world we would be able to track that the merged location is used in In cases where the InlinedAt field is unchanged we keep the atom with the In cases where the InlinedAt field is adjusted we generate a new atom group. Add unittest in MetadataTest.cpp. Full diff: https://github.com/llvm/llvm-project/pull/133480.diff 2 Files Affected:
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index aefda2f7be0b0..6463be8a21850 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -189,11 +189,15 @@ DILocation *DILocation::getMergedLocation(DILocation *LocA, DILocation *LocB) {
// Merge the two locations if possible, using the supplied
// inlined-at location for the created location.
- auto MergeLocPair = [&C](const DILocation *L1, const DILocation *L2,
- DILocation *InlinedAt) -> DILocation * {
+ auto *LocAIA = LocA->getInlinedAt();
+ auto *LocBIA = LocB->getInlinedAt();
+ auto MergeLocPair = [&C, LocAIA,
+ LocBIA](const DILocation *L1, const DILocation *L2,
+ DILocation *InlinedAt) -> DILocation * {
if (L1 == L2)
return DILocation::get(C, L1->getLine(), L1->getColumn(), L1->getScope(),
- InlinedAt);
+ InlinedAt, L1->isImplicitCode(),
+ L1->getAtomGroup(), L1->getAtomRank());
// If the locations originate from different subprograms we can't produce
// a common location.
@@ -226,8 +230,44 @@ DILocation *DILocation::getMergedLocation(DILocation *LocA, DILocation *LocB) {
bool SameCol = L1->getColumn() == L2->getColumn();
unsigned Line = SameLine ? L1->getLine() : 0;
unsigned Col = SameLine && SameCol ? L1->getColumn() : 0;
-
- return DILocation::get(C, Line, Col, Scope, InlinedAt);
+ bool IsImplicitCode = L1->isImplicitCode() && L2->isImplicitCode();
+ uint64_t Group = 0;
+ uint64_t Rank = 0;
+ if (SameLine) {
+ if (L1->getAtomGroup() || L2->getAtomGroup()) {
+ // If we're preserving the same matching inlined-at field we can
+ // preserve the atom.
+ if (LocBIA == LocAIA && InlinedAt == LocBIA) {
+ // Deterministically keep the lowest non-zero ranking atom group
+ // number.
+ // FIXME: It would be nice if we could track that an instruction
+ // belongs to two source atoms.
+ bool UseL1Atom = [L1, L2]() {
+ if (L1->getAtomRank() == L2->getAtomRank()) {
+ // Arbitrarily choose the lowest non-zero group number.
+ if (!L1->getAtomGroup() || !L2->getAtomGroup())
+ return !L2->getAtomGroup();
+ return L1->getAtomGroup() < L2->getAtomGroup();
+ }
+ // Choose the lowest non-zero rank.
+ if (!L1->getAtomRank() || !L2->getAtomRank())
+ return !L2->getAtomRank();
+ return L1->getAtomRank() < L2->getAtomRank();
+ }();
+ Group = UseL1Atom ? L1->getAtomGroup() : L2->getAtomGroup();
+ Rank = UseL1Atom ? L1->getAtomRank() : L2->getAtomRank();
+ } else {
+ // If either instruction is part of a source atom, reassign it a new
+ // atom group. This essentially regresses to non-key-instructions
+ // behaviour (now that it's the only instruction in its group it'll
+ // probably get is_stmt applied).
+ Group = C.incNextAtomGroup();
+ Rank = 1;
+ }
+ }
+ }
+ return DILocation::get(C, Line, Col, Scope, InlinedAt, IsImplicitCode,
+ Group, Rank);
};
DILocation *Result = ARIt != ALocs.rend() ? (*ARIt)->getInlinedAt() : nullptr;
@@ -254,7 +294,9 @@ DILocation *DILocation::getMergedLocation(DILocation *LocA, DILocation *LocB) {
// historically picked A's scope, and a nullptr inlined-at location, so that
// behavior is mimicked here but I am not sure if this is always the correct
// way to handle this.
- return DILocation::get(C, 0, 0, LocA->getScope(), nullptr);
+ // Key Instructions: it's fine to drop atom group and rank here, as line 0
+ // is a nonsensical is_stmt location.
+ return DILocation::get(C, 0, 0, LocA->getScope(), nullptr, false, 0, 0);
}
std::optional<unsigned>
diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp
index 8a1c1f9d306c6..4a45dd6009ca6 100644
--- a/llvm/unittests/IR/MetadataTest.cpp
+++ b/llvm/unittests/IR/MetadataTest.cpp
@@ -1243,6 +1243,140 @@ TEST_F(DILocationTest, Merge) {
auto *M2 = DILocation::getMergedLocation(A2, B);
EXPECT_EQ(M1, M2);
}
+
+#ifdef EXPERIMENTAL_KEY_INSTRUCTIONS
+#define EXPECT_ATOM(Loc, Group, Rank) \
+ EXPECT_EQ(Group, M->getAtomGroup()); \
+ EXPECT_EQ(Rank, M->getAtomRank());
+#else
+#define EXPECT_ATOM(Loc, Group, Rank) \
+ EXPECT_EQ(0u, M->getAtomGroup()); \
+ EXPECT_EQ(0u, M->getAtomRank()); \
+ (void)Group; \
+ (void)Rank;
+#endif
+ // Identical, including source atom numbers.
+ {
+ auto *A = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 1);
+ auto *B = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 1);
+ auto *M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 1u, 1u);
+ // DILocations are uniqued, so we can check equality by ptr.
+ EXPECT_EQ(M, DILocation::getMergedLocation(A, B));
+ }
+
+ // Identical but different atom ranks (same atom) - choose the lowest nonzero
+ // rank.
+ {
+ auto *A = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 1);
+ auto *B = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 2);
+ auto *M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 1u, 1u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+
+ A = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 0);
+ B = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 2);
+ M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 1u, 2u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+ }
+
+ // Identical but different atom ranks (different atom) - choose the lowest
+ // nonzero rank.
+ {
+ auto *A = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 1);
+ auto *B = DILocation::get(Context, 2, 7, N, nullptr, false, 2, 2);
+ auto *M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 1u, 1u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+
+ A = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 0);
+ B = DILocation::get(Context, 2, 7, N, nullptr, false, 2, 2);
+ M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 2u, 2u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+ }
+
+ // Identical but equal atom rank (different atom) - choose the lowest non-zero
+ // group (arbitrary choice for deterministic behaviour).
+ {
+ auto *A = DILocation::get(Context, 2, 7, N, nullptr, false, 1, 1);
+ auto *B = DILocation::get(Context, 2, 7, N, nullptr, false, 2, 1);
+ auto *M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 1u, 1u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+
+ A = DILocation::get(Context, 2, 7, N, nullptr, false, 0, 1);
+ B = DILocation::get(Context, 2, 7, N, nullptr, false, 2, 1);
+ M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 2u, 1u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+ }
+
+ // Completely different except same atom numbers. Zero out the atoms.
+ {
+ auto *I = DILocation::get(Context, 2, 7, N);
+ auto *A = DILocation::get(Context, 1, 6, S, I, false, 1, 1);
+ auto *B =
+ DILocation::get(Context, 2, 7, getSubprogram(), nullptr, false, 1, 1);
+ auto *M = DILocation::getMergedLocation(A, B);
+ EXPECT_EQ(0u, M->getLine());
+ EXPECT_EQ(0u, M->getColumn());
+ EXPECT_TRUE(isa<DILocalScope>(M->getScope()));
+ EXPECT_EQ(S, M->getScope());
+ EXPECT_EQ(nullptr, M->getInlinedAt());
+ }
+
+ // Same inlined-at chain but different atoms. Choose the lowest
+ // non-zero group (arbitrary choice for deterministic behaviour).
+ {
+ auto *I = DILocation::get(Context, 1, 7, N);
+ auto *F = getSubprogram();
+ auto *A = DILocation::get(Context, 1, 1, F, I, false, 1, 2);
+ auto *B = DILocation::get(Context, 1, 1, F, I, false, 2, 1);
+ auto *M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 2u, 1u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+
+ A = DILocation::get(Context, 1, 1, F, I, false, 1, 2);
+ B = DILocation::get(Context, 1, 1, F, I, false, 2, 0);
+ M = DILocation::getMergedLocation(A, B);
+ EXPECT_ATOM(M, 1u, 2u);
+ EXPECT_EQ(M, DILocation::getMergedLocation(B, A));
+ }
+
+ // Partially equal inlined-at chain but different atoms. Generate a new atom
+ // group (if either have a group number). This configuration seems unlikely
+ // to occur as line numbers must match, but isn't impossible.
+ {
+ // Reset global counter to ensure EXPECT numbers line up.
+ Context.pImpl->NextAtomGroup = 1;
+ // x1 -> y2 -> z4
+ // y3 -> z4
+ auto *FX = getSubprogram();
+ auto *FY = getSubprogram();
+ auto *FZ = getSubprogram();
+ auto *Z4 = DILocation::get(Context, 1, 4, FZ);
+ auto *Y3IntoZ4 = DILocation::get(Context, 1, 3, FY, Z4, false, 1, 1);
+ auto *Y2IntoZ4 = DILocation::get(Context, 1, 2, FY, Z4);
+ auto *X1IntoY2 = DILocation::get(Context, 1, 1, FX, Y2IntoZ4);
+ auto *M = DILocation::getMergedLocation(X1IntoY2, Y3IntoZ4);
+ EXPECT_EQ(M->getScope(), FY);
+ EXPECT_EQ(M->getInlinedAt()->getScope(), FZ);
+ EXPECT_ATOM(M, 2u, 1u);
+
+ // This swapped merge will produce a new atom group too.
+ M = DILocation::getMergedLocation(Y3IntoZ4, X1IntoY2);
+
+ // Same again, even if the atom numbers match.
+ auto *X1IntoY2SameAtom =
+ DILocation::get(Context, 1, 1, FX, Y2IntoZ4, false, 1, 1);
+ M = DILocation::getMergedLocation(X1IntoY2SameAtom, Y3IntoZ4);
+ EXPECT_ATOM(M, 4u, 1u);
+ M = DILocation::getMergedLocation(Y3IntoZ4, X1IntoY2SameAtom);
+ EXPECT_ATOM(M, 5u, 1u);
+ }
+#undef EXPECT_ATOM
}
TEST_F(DILocationTest, getDistinct) {
|
if (L1 == L2) | ||
return DILocation::get(C, L1->getLine(), L1->getColumn(), L1->getScope(), | ||
InlinedAt); | ||
InlinedAt, L1->isImplicitCode(), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically copying L1->isImplicitCode()
here is a change in behaviour - normally that flag would be effectively dropped here.
llvm/lib/IR/DebugInfoMetadata.cpp
Outdated
// If either instruction is part of a source atom, reassign it a new | ||
// atom group. This essentially regresses to non-key-instructions | ||
// behaviour (now that it's the only instruction in its group it'll | ||
// probably get is_stmt applied). | ||
Group = C.incNextAtomGroup(); | ||
Rank = 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this necessary? Since we use inlinedAt
as part of the tuple alongside atomGroup
, keeping the group the same would still result in the merged instruction becoming part of a distinct "group" (with is_stmt
likely applying). Likewise, since we're creating a new group it sounds to me like it would be unnecessary to change the rank?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new {atomGroup, inlinedAt} tuple wouldn't necessarily be distinct. (It feels unlikely, but AFAICT is possible that another merged instruction from a different source atom ends up with the same tuple).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes sense - although I still suspect there's some form of optimization we could do here (isolating the cases where atomGroups need to change), better to go with what definitely works here!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, although I think the atom-group selection could be refactored so that it's simpler to read. There's already multiple levels of nesting happening in that function, best to reduce that as much as possible (possibly put it in a completely different helper function to separate the concerns?)
llvm/lib/IR/DebugInfoMetadata.cpp
Outdated
if (SameLine) { | ||
if (L1->getAtomGroup() || L2->getAtomGroup()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Early-exit the lambda/delegate instead?
llvm/unittests/IR/MetadataTest.cpp
Outdated
auto *A = DILocation::get(Context, 1, 1, F, I, false, 1, 2); | ||
auto *B = DILocation::get(Context, 1, 1, F, I, false, 2, 1); | ||
auto *M = DILocation::getMergedLocation(A, B); | ||
EXPECT_ATOM(M, 2u, 1u); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an awkwardness I forsee in the future: because these are just integers, I tend to forget which way around things are. And so in this example: isn't the first integer to EXPECT_ATOM the group, which according to the comment here should be the lowest, but you're expecting the highest (2 instead of 1)? (Looks like the rank-test is applied before the atom test?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like you're right, I think this part of the test is incorrect (should give both locations the same rank to trigger the group number comparison). Unsure how to solve the integer-order confusion other than adding arg comments, e.g.
auto *A = DILocation::get(Context, 1, 1, F, I, false, /*atomGroup*/1, /*atomRank*/2);
(clangd+vscode annotates arguments with the parameter names a bit like that, so I didn't consider it much, but agree that if we can do something about it we should - any suggestions other than the arg comments?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No real suggestions; I think the C++17 way might be to invent user-defined-literals and an integer-like object, but that seems like waaayyyy overkill here. Or maybe enum classes? The unit tests will also (IMHO YMMV?) be the only place where literals get fed into this.
An ugly alternative is to put the pattern in the name, i.e. "DILocation::getWithAtomRank", so that every time it appears you're reminded what argument order it should be.
// Partially equal inlined-at chain but different atoms. Generate a new atom | ||
// group (if either have a group number). This configuration seems unlikely | ||
// to occur as line numbers must match, but isn't impossible. | ||
{ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe it's a desired and supported configuration for this to occur when a function is inlined multiple times then the copies are merged: we keep the line number of the instruction, but put line-zero in the inlinedAt field so that the inlining-callstack refers to a nonexistant location. Thus, we're saying "It's definitely this line, but we can't tell you what inlining context it came from, there were a few".
I don't know whether it makes sense to try and support key instructions in that situation; I feel that falling back to old stepping behaviour is fine in these rare circumstances.
07f9c1f
to
71329a0
Compare
0335154
to
2c538d7
Compare
@SLTozer are you happy with my responses to your inline comments? And a few weeks ago offline we discussed that the slightly not-nfc change to preserve isImplicitCode is probably ok. Are you still happy with that? @jmorse I used some early-exits in the lambda - does that look ok? I can try to separate some bits out if you'd prefer (I'm currently erring on the side of keeping all the context in one place). I added comments to the function arguments to highlight which integer is which (I prefer this approach rather than introducing more boilerplate for code that won't often be touched, but I'm happy to alter further if needed). Slightly annoyingly this pushes the unittest calls over the column limit... I could shorten "AtomGroup" and "AtomRank" to just "Atom" and "Rank", but "Rank" feels too nebulous? |
Github has helpfully chucked away my comments; was the unit-test-error that I fixed corrected? If so, LGTM. |
Yep, fixed that |
71329a0
to
0847aa5
Compare
NFC for builds with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=OFF (default). In an ideal world we would be able to track that the merged location is used in multiple source atoms. We can't do this though, so instead we arbitrarily but deterministically pick one. In cases where the InlinedAt field is unchanged we keep the atom with the lowest non-zero rank (highest precedence). If the ranks are equal we choose the smaller non-zero group number (arbitrary choice). In cases where the InlinedAt field is adjusted we generate a new atom group. Keeping the group wouldn't make sense (a source atom is identified by the group number and InlinedAt pair) but discarding the atom info could result in missed is_stmts. Add unittest in MetadataTest.cpp.
1353653
to
6c4caf3
Compare
NFC for builds with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=OFF (default). In an ideal world we would be able to track that the merged location is used in multiple source atoms. We can't do this though, so instead we arbitrarily but deterministically pick one. In cases where the InlinedAt field is unchanged we keep the atom with the lowest non-zero rank (highest precedence). If the ranks are equal we choose the smaller non-zero group number (arbitrary choice). In cases where the InlinedAt field is adjusted we generate a new atom group. Keeping the group wouldn't make sense (a source atom is identified by the group number and InlinedAt pair) but discarding the atom info could result in missed is_stmts. Add unittest in MetadataTest.cpp. RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
NFC for builds with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=OFF (default). In an ideal world we would be able to track that the merged location is used in multiple source atoms. We can't do this though, so instead we arbitrarily but deterministically pick one. In cases where the InlinedAt field is unchanged we keep the atom with the lowest non-zero rank (highest precedence). If the ranks are equal we choose the smaller non-zero group number (arbitrary choice). In cases where the InlinedAt field is adjusted we generate a new atom group. Keeping the group wouldn't make sense (a source atom is identified by the group number and InlinedAt pair) but discarding the atom info could result in missed is_stmts. Add unittest in MetadataTest.cpp. RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
NFC for builds with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=OFF (default). In an ideal world we would be able to track that the merged location is used in multiple source atoms. We can't do this though, so instead we arbitrarily but deterministically pick one. In cases where the InlinedAt field is unchanged we keep the atom with the lowest non-zero rank (highest precedence). If the ranks are equal we choose the smaller non-zero group number (arbitrary choice). In cases where the InlinedAt field is adjusted we generate a new atom group. Keeping the group wouldn't make sense (a source atom is identified by the group number and InlinedAt pair) but discarding the atom info could result in missed is_stmts. Add unittest in MetadataTest.cpp. RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
NFC for builds with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS=OFF (default).
In an ideal world we would be able to track that the merged location is used in
multiple source atoms. We can't do this though, so instead we arbitrarily but
deterministically pick one.
In cases where the InlinedAt field is unchanged we keep the atom with the
lowest non-zero rank (highest precedence). If the ranks are equal we choose
the smaller non-zero group number (arbitrary choice).
In cases where the InlinedAt field is adjusted we generate a new atom group.
Keeping the group wouldn't make sense (a source atom is identified by the
group number and InlinedAt pair) but discarding the atom info could result
in missed is_stmts.
Add unittest in MetadataTest.cpp.