Skip to content

Commit 0b5daeb

Browse files
[GlobalISel] Fix miscompile when narrowing vector loads/stores to non-byte-sized types (llvm#136739)
LegalizerHelper::reduceLoadStoreWidth does not work for non-byte-sized types, because this would require (un)packing of bits across byte boundaries. Precommit tests: llvm#134904
1 parent 81870cb commit 0b5daeb

File tree

4 files changed

+370
-655
lines changed

4 files changed

+370
-655
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -5210,6 +5210,11 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
52105210
if (TypeIdx != 0)
52115211
return UnableToLegalize;
52125212

5213+
if (!NarrowTy.isByteSized()) {
5214+
LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5215+
return UnableToLegalize;
5216+
}
5217+
52135218
// This implementation doesn't work for atomics. Give up instead of doing
52145219
// something invalid.
52155220
if (LdStMI.isAtomic())

llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir

+12-92
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
# RUN: llc -O0 -mtriple=aarch64 -verify-machineinstrs -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' -o - %s 2> %t.err | FileCheck %s
33
# RUN: FileCheck -check-prefix=ERR %s < %t.err
44

5-
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in function: load-narrow-scalar-high-bits)
5+
# ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %{{[0-9]+}}:_(<8 x s9>), %{{[0-9]+}}:_(p0) :: (store (<8 x s9>), align 16) (in function: store-narrow-non-byte-sized)
6+
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(<8 x s9>) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<8 x s9>), align 16) (in function: load-narrow-non-byte-sized)
7+
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in function: load-narrow-scalar-high-bits)
68

7-
# FIXME: Scalarized stores for non-byte-sized vector elements store incorrect partial values.
9+
# FIXME: Non-byte-sized vector elements cause fallback in LegalizerHelper::reduceLoadStoreWidth
810
---
911
name: store-narrow-non-byte-sized
1012
tracksRegLiveness: true
@@ -15,60 +17,10 @@ body: |
1517
; CHECK: liveins: $x8
1618
; CHECK-NEXT: {{ $}}
1719
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8
18-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
19-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
20-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 511
21-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
22-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
23-
; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16), align 16)
24-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
25-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
26-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
27-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
28-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
29-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
30-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
31-
; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 1, align 1)
32-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
33-
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
34-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
35-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
36-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
37-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32)
38-
; CHECK-NEXT: G_STORE [[TRUNC2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into unknown-address + 2)
39-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
40-
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
41-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
42-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
43-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[COPY7]]
44-
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32)
45-
; CHECK-NEXT: G_STORE [[TRUNC3]](s16), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 3, align 1)
46-
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
47-
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
48-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
49-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
50-
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[COPY9]]
51-
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32)
52-
; CHECK-NEXT: G_STORE [[TRUNC4]](s16), [[PTR_ADD3]](p0) :: (store (s16) into unknown-address + 4, align 4)
53-
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
54-
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
55-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
56-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
57-
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[COPY11]]
58-
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[AND5]](s32)
59-
; CHECK-NEXT: G_STORE [[TRUNC5]](s16), [[PTR_ADD4]](p0) :: (store (s16) into unknown-address + 5, align 1)
60-
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
61-
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
62-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
63-
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[C]], [[COPY12]]
64-
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[AND6]](s32)
65-
; CHECK-NEXT: G_STORE [[TRUNC6]](s16), [[PTR_ADD5]](p0) :: (store (s16) into unknown-address + 6)
66-
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
67-
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
68-
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
69-
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[C3]], [[COPY13]]
70-
; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[AND7]](s32)
71-
; CHECK-NEXT: G_STORE [[TRUNC7]](s16), [[PTR_ADD6]](p0) :: (store (s16) into unknown-address + 7, align 1)
20+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s9) = G_CONSTANT i9 -256
21+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s9) = G_CONSTANT i9 -255
22+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s9>) = G_BUILD_VECTOR [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9)
23+
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s9>), [[COPY]](p0) :: (store (<8 x s9>), align 16)
7224
; CHECK-NEXT: RET_ReallyLR
7325
%0:_(p0) = COPY $x8
7426
%1:_(s9) = G_CONSTANT i9 256
@@ -153,7 +105,7 @@ body: |
153105
...
154106

155107

156-
# FIXME: Scalarized loads for non-byte-sized vector elements load incorrect partial values.
108+
# FIXME: Non-byte-sized vector elements cause fallback in LegalizerHelper::reduceLoadStoreWidth
157109
---
158110
name: load-narrow-non-byte-sized
159111
tracksRegLiveness: true
@@ -164,41 +116,9 @@ body: |
164116
; CHECK: liveins: $x8
165117
; CHECK-NEXT: {{ $}}
166118
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8
167-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load (s16), align 16)
168-
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD]], 9
169-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
170-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
171-
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 1, align 1)
172-
; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD1]], 9
173-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
174-
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
175-
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 2)
176-
; CHECK-NEXT: [[ASSERT_ZEXT2:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD2]], 9
177-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
178-
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
179-
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 3, align 1)
180-
; CHECK-NEXT: [[ASSERT_ZEXT3:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD3]], 9
181-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
182-
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
183-
; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 4, align 4)
184-
; CHECK-NEXT: [[ASSERT_ZEXT4:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD4]], 9
185-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
186-
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
187-
; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 5, align 1)
188-
; CHECK-NEXT: [[ASSERT_ZEXT5:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD5]], 9
189-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
190-
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
191-
; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 6)
192-
; CHECK-NEXT: [[ASSERT_ZEXT6:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD6]], 9
193-
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
194-
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
195-
; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 7, align 1)
196-
; CHECK-NEXT: [[ASSERT_ZEXT7:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD7]], 9
197-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[ASSERT_ZEXT]](s16), [[ASSERT_ZEXT1]](s16), [[ASSERT_ZEXT2]](s16), [[ASSERT_ZEXT3]](s16), [[ASSERT_ZEXT4]](s16), [[ASSERT_ZEXT5]](s16), [[ASSERT_ZEXT6]](s16), [[ASSERT_ZEXT7]](s16)
198-
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 511
199-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16)
200-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
201-
; CHECK-NEXT: $q0 = COPY [[AND]](<8 x s16>)
119+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s9>) = G_LOAD [[COPY]](p0) :: (load (<8 x s9>), align 16)
120+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[LOAD]](<8 x s9>)
121+
; CHECK-NEXT: $q0 = COPY [[ZEXT]](<8 x s16>)
202122
; CHECK-NEXT: RET_ReallyLR implicit $q0
203123
%0:_(p0) = COPY $x8
204124
%2:_(<8 x s9>) = G_LOAD %0(p0) :: (load (<8 x s9>), align 16)

0 commit comments

Comments
 (0)