Skip to content

Commit 5d3412a

Browse files
committed
[InstCombine] Insert a bitcast to enable merging similar store insts
Given two Store instructions with equivalent pointer operands, they could be merged into their common successor basic block if the value operand of one is bitcasted to match the type of the other. Differential Revision: https://reviews.llvm.org/D150900
1 parent 11926e6 commit 5d3412a

File tree

2 files changed

+253
-12
lines changed

2 files changed

+253
-12
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,6 +1611,17 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
16111611
if (!OtherBr || BBI == OtherBB->begin())
16121612
return false;
16131613

1614+
auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1615+
if (!OtherStore ||
1616+
OtherStore->getPointerOperand() != SI.getPointerOperand())
1617+
return false;
1618+
1619+
auto *SIVTy = SI.getValueOperand()->getType();
1620+
auto *OSVTy = OtherStore->getValueOperand()->getType();
1621+
return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1622+
SI.hasSameSpecialState(OtherStore);
1623+
};
1624+
16141625
// If the other block ends in an unconditional branch, check for the 'if then
16151626
// else' case. There is an instruction before the branch.
16161627
StoreInst *OtherStore = nullptr;
@@ -1626,8 +1637,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
16261637
// If this isn't a store, isn't a store to the same location, or is not the
16271638
// right kind of store, bail out.
16281639
OtherStore = dyn_cast<StoreInst>(BBI);
1629-
if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1630-
!SI.isSameOperationAs(OtherStore))
1640+
if (!OtherStoreIsMergeable(OtherStore))
16311641
return false;
16321642
} else {
16331643
// Otherwise, the other block ended with a conditional branch. If one of the
@@ -1641,12 +1651,10 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
16411651
// lives in OtherBB.
16421652
for (;; --BBI) {
16431653
// Check to see if we find the matching store.
1644-
if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1645-
if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1646-
!SI.isSameOperationAs(OtherStore))
1647-
return false;
1654+
OtherStore = dyn_cast<StoreInst>(BBI);
1655+
if (OtherStoreIsMergeable(OtherStore))
16481656
break;
1649-
}
1657+
16501658
// If we find something that may be using or overwriting the stored
16511659
// value, or if we run out of instructions, we can't do the transform.
16521660
if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
@@ -1664,14 +1672,17 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
16641672
}
16651673

16661674
// Insert a PHI node now if we need it.
1667-
Value *MergedVal = OtherStore->getOperand(0);
1675+
Value *MergedVal = OtherStore->getValueOperand();
16681676
// The debug locations of the original instructions might differ. Merge them.
16691677
DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
16701678
OtherStore->getDebugLoc());
1671-
if (MergedVal != SI.getOperand(0)) {
1672-
PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1673-
PN->addIncoming(SI.getOperand(0), SI.getParent());
1674-
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1679+
if (MergedVal != SI.getValueOperand()) {
1680+
PHINode *PN =
1681+
PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1682+
PN->addIncoming(SI.getValueOperand(), SI.getParent());
1683+
Builder.SetInsertPoint(OtherStore);
1684+
PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1685+
OtherBB);
16751686
MergedVal = InsertNewInstBefore(PN, DestBB->front());
16761687
PN->setDebugLoc(MergedLoc);
16771688
}

llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,233 @@ bb10: ; preds = %bb
7171
bb12: ; preds = %bb10, %bb9
7272
ret void
7373
}
74+
75+
define half @diff_types_same_width_merge(i1 %cond, half %a, i16 %b) {
76+
; CHECK-LABEL: @diff_types_same_width_merge(
77+
; CHECK-NEXT: entry:
78+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
79+
; CHECK: BB0:
80+
; CHECK-NEXT: br label [[SINK:%.*]]
81+
; CHECK: BB1:
82+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16 [[B:%.*]] to half
83+
; CHECK-NEXT: br label [[SINK]]
84+
; CHECK: sink:
85+
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB1]] ], [ [[A:%.*]], [[BB0]] ]
86+
; CHECK-NEXT: ret half [[STOREMERGE]]
87+
;
88+
entry:
89+
%alloca = alloca half
90+
br i1 %cond, label %BB0, label %BB1
91+
BB0:
92+
store half %a, ptr %alloca
93+
br label %sink
94+
BB1:
95+
store i16 %b, ptr %alloca
96+
br label %sink
97+
sink:
98+
%val = load half, ptr %alloca
99+
ret half %val
100+
}
101+
102+
define i32 @diff_types_diff_width_no_merge(i1 %cond, i32 %a, i64 %b) {
103+
; CHECK-LABEL: @diff_types_diff_width_no_merge(
104+
; CHECK-NEXT: entry:
105+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8
106+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
107+
; CHECK: A:
108+
; CHECK-NEXT: store i32 [[A:%.*]], ptr [[ALLOCA]], align 8
109+
; CHECK-NEXT: br label [[SINK:%.*]]
110+
; CHECK: B:
111+
; CHECK-NEXT: store i64 [[B:%.*]], ptr [[ALLOCA]], align 8
112+
; CHECK-NEXT: br label [[SINK]]
113+
; CHECK: sink:
114+
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 8
115+
; CHECK-NEXT: ret i32 [[VAL]]
116+
;
117+
entry:
118+
%alloca = alloca i64
119+
br i1 %cond, label %A, label %B
120+
A:
121+
store i32 %a, ptr %alloca
122+
br label %sink
123+
B:
124+
store i64 %b, ptr %alloca
125+
br label %sink
126+
sink:
127+
%val = load i32, ptr %alloca
128+
ret i32 %val
129+
}
130+
131+
define <4 x i32> @vec_no_merge(i1 %cond, <2 x i32> %a, <4 x i32> %b) {
132+
; CHECK-LABEL: @vec_no_merge(
133+
; CHECK-NEXT: entry:
134+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 16
135+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
136+
; CHECK: A:
137+
; CHECK-NEXT: store <2 x i32> [[A:%.*]], ptr [[ALLOCA]], align 16
138+
; CHECK-NEXT: br label [[SINK:%.*]]
139+
; CHECK: B:
140+
; CHECK-NEXT: store <4 x i32> [[B:%.*]], ptr [[ALLOCA]], align 16
141+
; CHECK-NEXT: br label [[SINK]]
142+
; CHECK: sink:
143+
; CHECK-NEXT: [[VAL:%.*]] = load <4 x i32>, ptr [[ALLOCA]], align 16
144+
; CHECK-NEXT: ret <4 x i32> [[VAL]]
145+
;
146+
entry:
147+
%alloca = alloca i64
148+
br i1 %cond, label %A, label %B
149+
A:
150+
store <2 x i32> %a, ptr %alloca
151+
br label %sink
152+
B:
153+
store <4 x i32> %b, ptr %alloca
154+
br label %sink
155+
sink:
156+
%val = load <4 x i32>, ptr %alloca
157+
ret <4 x i32> %val
158+
}
159+
160+
%struct.half = type { half };
161+
162+
define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) {
163+
; CHECK-LABEL: @one_elem_struct_merge(
164+
; CHECK-NEXT: entry:
165+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
166+
; CHECK: BB0:
167+
; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0
168+
; CHECK-NEXT: br label [[SINK:%.*]]
169+
; CHECK: BB1:
170+
; CHECK-NEXT: br label [[SINK]]
171+
; CHECK: sink:
172+
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ]
173+
; CHECK-NEXT: [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0
174+
; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1]]
175+
;
176+
entry:
177+
%alloca = alloca i64
178+
br i1 %cond, label %BB0, label %BB1
179+
BB0:
180+
store %struct.half %a, ptr %alloca
181+
br label %sink
182+
BB1:
183+
store half %b, ptr %alloca
184+
br label %sink
185+
sink:
186+
%val = load %struct.half, ptr %alloca
187+
ret %struct.half %val
188+
}
189+
190+
%struct.tup = type { half, i32 };
191+
192+
define %struct.tup @multi_elem_struct_no_merge(i1 %cond, %struct.tup %a, half %b) {
193+
; CHECK-LABEL: @multi_elem_struct_no_merge(
194+
; CHECK-NEXT: entry:
195+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8
196+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
197+
; CHECK: A:
198+
; CHECK-NEXT: store [[STRUCT_TUP:%.*]] [[A:%.*]], ptr [[ALLOCA]], align 8
199+
; CHECK-NEXT: br label [[SINK:%.*]]
200+
; CHECK: B:
201+
; CHECK-NEXT: store half [[B:%.*]], ptr [[ALLOCA]], align 8
202+
; CHECK-NEXT: br label [[SINK]]
203+
; CHECK: sink:
204+
; CHECK-NEXT: [[VAL:%.*]] = load [[STRUCT_TUP]], ptr [[ALLOCA]], align 8
205+
; CHECK-NEXT: ret [[STRUCT_TUP]] [[VAL]]
206+
;
207+
entry:
208+
%alloca = alloca i64
209+
br i1 %cond, label %A, label %B
210+
A:
211+
store %struct.tup %a, ptr %alloca
212+
br label %sink
213+
B:
214+
store half %b, ptr %alloca
215+
br label %sink
216+
sink:
217+
%val = load %struct.tup, ptr %alloca
218+
ret %struct.tup %val
219+
}
220+
221+
define i16 @same_types_diff_align_no_merge(i1 %cond, i16 %a, i16 %b) {
222+
; CHECK-LABEL: @same_types_diff_align_no_merge(
223+
; CHECK-NEXT: entry:
224+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i16, align 4
225+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
226+
; CHECK: BB0:
227+
; CHECK-NEXT: store i16 [[A:%.*]], ptr [[ALLOCA]], align 8
228+
; CHECK-NEXT: br label [[SINK:%.*]]
229+
; CHECK: BB1:
230+
; CHECK-NEXT: store i16 [[B:%.*]], ptr [[ALLOCA]], align 4
231+
; CHECK-NEXT: br label [[SINK]]
232+
; CHECK: sink:
233+
; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ALLOCA]], align 4
234+
; CHECK-NEXT: ret i16 [[VAL]]
235+
;
236+
entry:
237+
%alloca = alloca i16, align 4
238+
br i1 %cond, label %BB0, label %BB1
239+
BB0:
240+
store i16 %a, ptr %alloca, align 8
241+
br label %sink
242+
BB1:
243+
store i16 %b, ptr %alloca, align 4
244+
br label %sink
245+
sink:
246+
%val = load i16, ptr %alloca
247+
ret i16 %val
248+
}
249+
250+
define i64 @ptrtoint_merge(i1 %cond, i64 %a, ptr %b) {
251+
; CHECK-LABEL: @ptrtoint_merge(
252+
; CHECK-NEXT: entry:
253+
; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
254+
; CHECK: BB0:
255+
; CHECK-NEXT: br label [[SINK:%.*]]
256+
; CHECK: BB1:
257+
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[B:%.*]] to i64
258+
; CHECK-NEXT: br label [[SINK]]
259+
; CHECK: sink:
260+
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i64 [ [[A:%.*]], [[BB0]] ], [ [[TMP0]], [[BB1]] ]
261+
; CHECK-NEXT: ret i64 [[STOREMERGE]]
262+
;
263+
entry:
264+
%alloca = alloca ptr
265+
br i1 %cond, label %BB0, label %BB1
266+
BB0:
267+
store i64 %a, ptr %alloca
268+
br label %sink
269+
BB1:
270+
store ptr %b, ptr %alloca
271+
br label %sink
272+
sink:
273+
%val = load i64, ptr %alloca
274+
ret i64 %val
275+
}
276+
277+
define ptr @inttoptr_merge(i1 %cond, i64 %a, ptr %b) {
278+
; CHECK-LABEL: define ptr @inttoptr_merge
279+
; CHECK-SAME: (i1 [[COND:%.*]], i64 [[A:%.*]], ptr [[B:%.*]]) {
280+
; CHECK-NEXT: entry:
281+
; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]]
282+
; CHECK: BB0:
283+
; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A]] to ptr
284+
; CHECK-NEXT: br label [[SINK:%.*]]
285+
; CHECK: BB1:
286+
; CHECK-NEXT: br label [[SINK]]
287+
; CHECK: sink:
288+
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B]], [[BB1]] ], [ [[TMP0]], [[BB0]] ]
289+
; CHECK-NEXT: ret ptr [[STOREMERGE]]
290+
;
291+
entry:
292+
%alloca = alloca ptr
293+
br i1 %cond, label %BB0, label %BB1
294+
BB0:
295+
store i64 %a, ptr %alloca, align 8
296+
br label %sink
297+
BB1:
298+
store ptr %b, ptr %alloca, align 8
299+
br label %sink
300+
sink:
301+
%val = load ptr, ptr %alloca
302+
ret ptr %val
303+
}

0 commit comments

Comments
 (0)