-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[KeyInstr][SimplifyCFG] Remap atoms when folding br to common succ into pred #133482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-debuginfo Author: Orlando Cazalet-Hyams (OCHyams) ChangesSimplifyCFG folds
Remap source atoms in Add multi-pred test. Full diff: https://github.com/llvm/llvm-project/pull/133482.diff 3 Files Affected:
diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index c22d3e9b10d27..7fdedc4acd919 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -76,6 +76,16 @@ namespace llvm {
LLVMContext &Ctx,
DenseMap<const MDNode *, MDNode *> &Cache);
+ /// Return true if the source locations match, ignoring isImplicitCode and
+ /// source atom info.
+ bool isSameSourceLocation(const DebugLoc &Other) const {
+ if (get() == Other.get())
+ return true;
+ return ((bool)*this == (bool)Other) && getLine() == Other.getLine() &&
+ getCol() == Other.getCol() && getScope() == Other.getScope() &&
+ getInlinedAt() == Other.getInlinedAt();
+ }
+
unsigned getLine() const;
unsigned getCol() const;
MDNode *getScope() const;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index fd83ec1a7f4fe..1ba1e4ac81000 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -73,6 +73,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LockstepReverseIterator.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -1129,13 +1130,17 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
Instruction *NewBonusInst = BonusInst.clone();
- if (!isa<DbgInfoIntrinsic>(BonusInst) &&
- PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
- // Unless the instruction has the same !dbg location as the original
- // branch, drop it. When we fold the bonus instructions we want to make
- // sure we reset their debug locations in order to avoid stepping on
- // dead code caused by folding dead branches.
- NewBonusInst->setDebugLoc(DebugLoc());
+ if (!isa<DbgInfoIntrinsic>(BonusInst)) {
+ if (!NewBonusInst->getDebugLoc().isSameSourceLocation(
+ PTI->getDebugLoc())) {
+ // Unless the instruction has the same !dbg location as the original
+ // branch, drop it. When we fold the bonus instructions we want to make
+ // sure we reset their debug locations in order to avoid stepping on
+ // dead code caused by folding dead branches.
+ NewBonusInst->setDebugLoc(DebugLoc());
+ } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
+ mapAtomInstance(DL, VMap);
+ }
}
RemapInstruction(NewBonusInst, VMap,
@@ -1182,6 +1187,19 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
U.set(NewBonusInst);
}
}
+
+ // Key Instructions: We may have propagated atom info into the pred. If the
+ // pred's terminator already has atom info do nothing as merging would drop
+ // one atom group anyway. If it doesn't, propagte the remapped atom group
+ // from BB's terminator.
+ if (auto &PredDL = PredBlock->getTerminator()->getDebugLoc()) {
+ auto &DL = BB->getTerminator()->getDebugLoc();
+ if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
+ PredDL.isSameSourceLocation(DL)) {
+ PredBlock->getTerminator()->setDebugLoc(DL);
+ RemapSourceAtom(PredBlock->getTerminator(), VMap);
+ }
+ }
}
bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
diff --git a/llvm/test/DebugInfo/KeyInstructions/Generic/simplifycfg-branch-fold.ll b/llvm/test/DebugInfo/KeyInstructions/Generic/simplifycfg-branch-fold.ll
new file mode 100644
index 0000000000000..8746f242007c3
--- /dev/null
+++ b/llvm/test/DebugInfo/KeyInstructions/Generic/simplifycfg-branch-fold.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt %s -S -passes=simplifycfg -bonus-inst-threshold=2 | FileCheck %s
+
+;; Block d gets folded into preds b and c. Check the cloned instructions get
+;; remapped DILocation atomGroup numbers in each of the preds. Additionally
+;; check that the branches each inherit the atomGroup of the folded branch.
+
+declare i32 @g(...)
+define void @f(i1 %c0, i1 %c1, i1 %c2, i32 %x, i32 %y) !dbg !17 {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) !dbg [[DBG6:![0-9]+]] {
+; CHECK-NEXT: [[A:.*:]]
+; CHECK-NEXT: br i1 [[C0]], label %[[B:.*]], label %[[C:.*]]
+; CHECK: [[B]]:
+; CHECK-NEXT: [[AND_OLD:%.*]] = and i32 [[X]], [[Y]], !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT: [[CMP_OLD:%.*]] = icmp eq i32 [[AND_OLD]], 0, !dbg [[DBG9:![0-9]+]]
+; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[C1]], i1 true, i1 [[CMP_OLD]], !dbg [[DBG10:![0-9]+]]
+; CHECK-NEXT: br i1 [[OR_COND1]], label %[[F:.*]], label %[[E:.*]], !dbg [[DBG11:![0-9]+]]
+; CHECK: [[C]]:
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]], !dbg [[DBG12:![0-9]+]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0, !dbg [[DBG13:![0-9]+]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2]], i1 true, i1 [[CMP]], !dbg [[DBG10]]
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[F]], label %[[E]], !dbg [[DBG14:![0-9]+]]
+; CHECK: [[E]]:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 (...) @g()
+; CHECK-NEXT: br label %[[F]]
+; CHECK: [[F]]:
+; CHECK-NEXT: ret void
+;
+a:
+ br i1 %c0, label %b, label %c
+
+b:
+ br i1 %c1, label %f, label %d, !dbg !18
+
+c:
+ br i1 %c2, label %f, label %d, !dbg !18
+
+d:
+ %and = and i32 %x, %y, !dbg !19
+ %cmp = icmp eq i32 %and, 0, !dbg !20
+ br i1 %cmp, label %f, label %e, !dbg !21
+
+e:
+ %7 = tail call i32 (...) @g()
+ br label %f
+
+f:
+ ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!3, !4}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "a.ll", directory: "/")
+!2 = !{}
+!3 = !{i32 9}
+!4 = !{i32 0}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !DISubroutineType(types: !2)
+!17 = distinct !DISubprogram(name: "f", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!18 = !DILocation(line: 10, column: 10, scope: !17)
+!19 = !DILocation(line: 10, column: 10, scope: !17, atomGroup: 1, atomRank: 2)
+!20 = !DILocation(line: 10, column: 10, scope: !17, atomGroup: 2, atomRank: 2)
+!21 = !DILocation(line: 10, column: 10, scope: !17, atomGroup: 2, atomRank: 1)
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]])
+; CHECK: [[META1]] = !DIFile(filename: "a.ll", directory: {{.*}})
+; CHECK: [[META2]] = !{}
+; CHECK: [[DBG6]] = distinct !DISubprogram(name: "f", scope: null, file: [[META1]], line: 1, type: [[META7:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META7]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG8]] = !DILocation(line: 10, column: 10, scope: [[DBG6]], atomGroup: 5, atomRank: 2)
+; CHECK: [[DBG9]] = !DILocation(line: 10, column: 10, scope: [[DBG6]], atomGroup: 6, atomRank: 2)
+; CHECK: [[DBG10]] = !DILocation(line: 10, column: 10, scope: [[DBG6]])
+; CHECK: [[DBG11]] = !DILocation(line: 10, column: 10, scope: [[DBG6]], atomGroup: 6, atomRank: 1)
+; CHECK: [[DBG12]] = !DILocation(line: 10, column: 10, scope: [[DBG6]], atomGroup: 3, atomRank: 2)
+; CHECK: [[DBG13]] = !DILocation(line: 10, column: 10, scope: [[DBG6]], atomGroup: 4, atomRank: 2)
+; CHECK: [[DBG14]] = !DILocation(line: 10, column: 10, scope: [[DBG6]], atomGroup: 4, atomRank: 1)
+;.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some minor nits, but this update looks correct.
// pred's terminator already has atom info do nothing as merging would drop | ||
// one atom group anyway. If it doesn't, propagte the remapped atom group | ||
// from BB's terminator. | ||
if (auto &PredDL = PredBlock->getTerminator()->getDebugLoc()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (auto &PredDL = PredBlock->getTerminator()->getDebugLoc()) { | |
if (auto &PredDL = PTI->getDebugLoc()) { |
If I understand it, PTI
is still PredBlock
's terminator?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, question inline. I worried a little about whether someone out there might inadvertantly re-use this function for some other purpose, but it looks like it's dedicated to llvm::foldBranchToCommonDest
, so pretty much a single use case.
// dead code caused by folding dead branches. | ||
NewBonusInst->setDebugLoc(DebugLoc()); | ||
} else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) { | ||
mapAtomInstance(DL, VMap); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What are the consequences for this when stepping -- this is an abnormal situation seeing how the source location is identical to where it's being hoisted to. Will developers potentially step on the same source location twice?
I understand the general situation of "code that is duplicated needs new atoms, because there are now multiple key instructions", but wouldn't this be different when the source-location at PTI potentially becomes key because we've remapped a key instruction we've hoisted to beneath it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I fully understand the questions. Do these answers help (or help to see what I'm missing?)
Will developers potentially step on the same source location twice?
Do you mean, if we have consecutive instructions with the same source location that are all is_stmt? I think the answer is that it depends on debugger implementation. FWIW I think the SCE debugger doesn't do repeated steps in that case atm. My current view is that it's better to include those is_stmts, and let the consumer decide what to do about it.
(Maybe I'm missing your point?)
wouldn't this be different when the source-location at PTI potentially becomes key because we've remapped a key instruction we've hoisted to beneath it?
I'm not sure what you mean about PTI's location becoming key? e.g. looking at the test - the new branches in b
and c
are only "key" because they're clones of the cond br from d
(which is already "key").
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the first part I think that answers the question (multiple instructions with the same source-location getting is_stmt). Would there also be potential for an intervening non-is_stmt instruction with the same source line between the two? It feels possible from instruction scheduling if not immediately in this pass, and if so I guess it's a general consequence of the technique.
It doesn't seem unreasonable, although perhaps we need some (debuggers) test coverage of how to interpret it. After all we're not trying to program the debugger from the compiler, we're communicating that "this source location really does happen to execute twice".
I'm not sure what you mean about PTI's location becoming key? e.g. looking at the test - the new branches in b and c are only "key" because they're clones of the cond br from d (which is already "key").
I was wondering whether there can be a case where the instruction at PTI and the bonus instruction are in the same group, but the one being remapped here has the higher rank, and putting it in a different group causes the previously-lower-ranked instruction to become the highest ranked as a result. At face value this isn't a problem because the whole point of this function is we're duplicating a code path; but could there be scenarios where LLVM uses this utility to implement a move (i.e. clone-then-delete-old-path?)
Or to put it another way: I believe (but may be wrong) that cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses
currently copies/moves instructions from one place to another. But, with this change, what was a plain copy/move now comes with changes to the stepping behaviour.
I think this boils down to saying "so what?", and the answer to that is "the stepping is still superior to without key instructions". I'm just trying to think through the consequences of how these changes compose together.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the first part I think that answers the question (multiple instructions with the same source-location getting is_stmt). Would there also be potential for an intervening non-is_stmt instruction with the same source line between the two? It feels possible from instruction scheduling if not immediately in this pass, and if so I guess it's a general consequence of the technique.
Yeah I think so. And again, kind of up to the debugger how to handle that IMO. FWIW I'm pretty sure our debugger skips forward until the next is_stmt line doesn't have the same line number.
I was wondering whether there can be a case where the instruction at PTI and the bonus instruction are in the same group, but the one being remapped here has the higher rank, and putting it in a different group causes the previously-lower-ranked instruction to become the highest ranked as a result.
Ah I see what you're saying. Yep, this is possibly edge-case and a subtle limitation of remapping in general. I figured that because it's a bit of a corner case and at worst we see an extra step (not regressing from current behaviour, just an extra step than key instructions might ideally want to produce), it wasn't worth worrying about.
At face value this isn't a problem because the whole point of this function is we're duplicating a code path; but could there be scenarios where LLVM uses this utility to implement a move (i.e. clone-then-delete-old-path?)
I guess in theory... in practice it's only reached through foldBranchToCommonDest
. I'm not sure if there's any way to detect/prevent future user error here.
Or to put it another way: I believe (but may be wrong) that cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses currently copies/moves instructions from one place to another. But, with this change, what was a plain copy/move now comes with changes to the stepping behaviour.
It copies rather than moves (because there can be multiple preds that we copy into). However, if there's only ever one pred then yes, this essentially moves-by-copy, and updates atom groups (even though it doesn't really need to). We could say "if there's one pred, then don't remap the atoms". I tested that and it works. But with multiple preds this function gets called multiple times, and in the final call there's only one pred. So that final pred doesn't get remapped atoms. It's a subtle difference, but in terms of "correctness" I think that's ok (it's as-ok as the existing situation).
I think this boils down to saying "so what?", and the answer to that is "the stepping is still superior to without key instructions". I'm just trying to think through the consequences of how these changes compose together.
I'm leaning towards "so what" and leaving it as I have in this commit. But if my response above sounds better to you, I'll happily make that change.
|
||
;; Block d gets folded into preds b and c. Check the cloned instructions get |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copy the block diagram from this review into here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure where I got the diagram in the description from. I've added one that actually describes this multi-pred test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I continued my ramblings inline; I see the potential for some slightly unexpected stepping behaviour, but still strictly better than todays "everything is a breakpoint" approach. I don't think I can put my finger on a specific bad behaviour that might come from this, so LGTM.
(Would still be nice to have the diagram in the test).
// dead code caused by folding dead branches. | ||
NewBonusInst->setDebugLoc(DebugLoc()); | ||
} else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) { | ||
mapAtomInstance(DL, VMap); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the first part I think that answers the question (multiple instructions with the same source-location getting is_stmt). Would there also be potential for an intervening non-is_stmt instruction with the same source line between the two? It feels possible from instruction scheduling if not immediately in this pass, and if so I guess it's a general consequence of the technique.
It doesn't seem unreasonable, although perhaps we need some (debuggers) test coverage of how to interpret it. After all we're not trying to program the debugger from the compiler, we're communicating that "this source location really does happen to execute twice".
I'm not sure what you mean about PTI's location becoming key? e.g. looking at the test - the new branches in b and c are only "key" because they're clones of the cond br from d (which is already "key").
I was wondering whether there can be a case where the instruction at PTI and the bonus instruction are in the same group, but the one being remapped here has the higher rank, and putting it in a different group causes the previously-lower-ranked instruction to become the highest ranked as a result. At face value this isn't a problem because the whole point of this function is we're duplicating a code path; but could there be scenarios where LLVM uses this utility to implement a move (i.e. clone-then-delete-old-path?)
Or to put it another way: I believe (but may be wrong) that cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses
currently copies/moves instructions from one place to another. But, with this change, what was a plain copy/move now comes with changes to the stepping behaviour.
I think this boils down to saying "so what?", and the answer to that is "the stepping is still superior to without key instructions". I'm just trying to think through the consequences of how these changes compose together.
a6f7a92
to
fde8c9e
Compare
eeb24e3
to
958c0d7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Github doesn't present your latest inline comments at the end of the "conversation" tab, but it does present them in the middle in a repeated comment section. Silly github.
I think we've discussed the subtleties enough, the tl;dr is that it's still an improvement and the copy-versus-move situation is only a hazard for code that doesn't exist. We can rely on future authors testing their code.
c89fa2b
to
06aae56
Compare
…to pred SimplifyCFG folds `b` into `a`. +-----------+ | v --> a --> b --> c --> d --> | ^ +-----------------+ Remap source atoms in `b` so that the duplicated instructions are analysed independently to determine is_stmt positions. This is necessary as the contents of `b` may be folded into multiple preds in this way. Add multi-pred test.
0b2929c
to
791863e
Compare
…to pred (llvm#133482) SimplifyCFG folds `d` into preds `b` and `c`. +---------------+ | | +--> b --+ | | v v --> a d --> e --> f --> | ^ ^ +--> c --+ | | | +---------------+ Remap source atoms so that the duplicated instructions are analysed independently to determine is_stmt positions. The pull request contains a discussion covering various edge cases here: https://github.com/llvm/llvm-project/pull/133482/files#r2039519348 The summary of the discussion is that we could avoid remapping when there's a single pred, but we decided that it's still a trade off, and not worth the additional complexity right now. RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
…to pred (llvm#133482) SimplifyCFG folds `d` into preds `b` and `c`. +---------------+ | | +--> b --+ | | v v --> a d --> e --> f --> | ^ ^ +--> c --+ | | | +---------------+ Remap source atoms so that the duplicated instructions are analysed independently to determine is_stmt positions. The pull request contains a discussion covering various edge cases here: https://github.com/llvm/llvm-project/pull/133482/files#r2039519348 The summary of the discussion is that we could avoid remapping when there's a single pred, but we decided that it's still a trade off, and not worth the additional complexity right now. RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
…to pred (llvm#133482) SimplifyCFG folds `d` into preds `b` and `c`. +---------------+ | | +--> b --+ | | v v --> a d --> e --> f --> | ^ ^ +--> c --+ | | | +---------------+ Remap source atoms so that the duplicated instructions are analysed independently to determine is_stmt positions. The pull request contains a discussion covering various edge cases here: https://github.com/llvm/llvm-project/pull/133482/files#r2039519348 The summary of the discussion is that we could avoid remapping when there's a single pred, but we decided that it's still a trade off, and not worth the additional complexity right now. RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
SimplifyCFG folds
b
intoa
.Remap source atoms in
b
so that the duplicated instructions are analysedindependently to determine is_stmt positions. This is necessary as the
contents of
b
may be folded into multiple preds in this way.Add multi-pred test.