Skip to content

Commit a69c770

Browse files
committed
[llvm] Ensure propagated constants in the vtable are aligned
1 parent 3ca2fa7 commit a69c770

File tree

7 files changed

+882
-56
lines changed

7 files changed

+882
-56
lines changed

llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,10 @@ struct PatternList {
226226
return false;
227227
}
228228
};
229+
230+
inline uint64_t RoundUp(uint64_t Size, uint64_t Align) {
231+
return (Size + (Align - 1)) / Align * Align;
232+
}
229233
} // namespace
230234

231235
// Find the minimum offset that we may store a value of size Size bits at. If
@@ -298,7 +302,9 @@ wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
298302
++Byte;
299303
}
300304
}
301-
return (MinByte + I) * 8;
305+
// Rounding up ensures the constant is always stored at address we
306+
// can directly load from without misalignment.
307+
return RoundUp((MinByte + I) * 8, Size);
302308
NextI:;
303309
}
304310
}
@@ -1834,9 +1840,19 @@ bool DevirtModule::tryVirtualConstProp(
18341840
if (!RetType)
18351841
return false;
18361842
unsigned BitWidth = RetType->getBitWidth();
1843+
1844+
// TODO: Since we can evaluated these constants at compile-time, we can save
1845+
// some space by calculating the smallest range of values that all these
1846+
// constants can fit in, then only allocate enough space to fit those values.
1847+
// At each callsite, we can get the original type by doing a sign/zero
1848+
// extension. For example, if we would store an i64, but we can see that all
1849+
// the values fit into an i16, then we can store an i16 before/after the
1850+
// vtable and at each callsite do a s/zext.
18371851
if (BitWidth > 64)
18381852
return false;
18391853

1854+
Align TypeAlignment = M.getDataLayout().getPrefTypeAlign(RetType);
1855+
18401856
// Make sure that each function is defined, does not access memory, takes at
18411857
// least one argument, does not use its first argument (which we assume is
18421858
// 'this'), and has the same return type.
@@ -1861,6 +1877,18 @@ bool DevirtModule::tryVirtualConstProp(
18611877
Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
18621878
Fn->getReturnType() != RetType)
18631879
return false;
1880+
1881+
// This only works if the integer size is at most the alignment of the
1882+
// vtable. If the table is underaligned, then we can't guarantee that the
1883+
// constant will always be aligned to the integer type alignment. For
1884+
// example, if the table is `align 1`, we can never guarantee that an i32
1885+
// stored before/after the vtable is 32-bit aligned without changing the
1886+
// alignment of the new global.
1887+
GlobalVariable *GV = Target.TM->Bits->GV;
1888+
Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
1889+
GV->getAlign(), GV->getValueType());
1890+
if (TypeAlignment > TableAlignment)
1891+
return false;
18641892
}
18651893

18661894
for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
@@ -1880,6 +1908,12 @@ bool DevirtModule::tryVirtualConstProp(
18801908

18811909
// Find an allocation offset in bits in all vtables associated with the
18821910
// type.
1911+
// TODO: If a series of i1s would be placed after the vtable, it would
1912+
// help save some space if they were placed at the very end after all
1913+
// other larger-size constants. Having these i1s anywhere in the middle
1914+
// of the allocation would mean extra padding is needed for any subsequent
1915+
// constants, but having them at the end wouldn't require this padding
1916+
// at the very end.
18831917
uint64_t AllocBefore =
18841918
findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
18851919
uint64_t AllocAfter =
@@ -1911,6 +1945,14 @@ bool DevirtModule::tryVirtualConstProp(
19111945
setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
19121946
OffsetBit);
19131947

1948+
// In an earlier check we forbade constant propagation from operating on
1949+
// tables whose alignment is less than the alignment needed for loading
1950+
// the constant. Thus, the address we take the offset from will always be
1951+
// aligned to at least this integer alignment. Now, we need to ensure that
1952+
// the offset is also aligned to this integer alignment to ensure we always
1953+
// have an aligned load.
1954+
assert(OffsetByte % TypeAlignment.value() == 0);
1955+
19141956
if (RemarksEnabled || AreStatisticsEnabled())
19151957
for (auto &&Target : TargetsForSlot)
19161958
Target.WasDevirt = true;
Lines changed: 96 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,35 @@
11
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
22

33
target datalayout = "e-p:64:64"
4-
target triple = "x86_64-unknown-linux-gnu"
54

6-
; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\01\00\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf1i32], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
5+
;; Note that i16 is used here such that we can ensure all constants for "typeid"
6+
;; can come before the vtable.
7+
; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\03\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf1i16], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
78
@vt1 = constant [3 x ptr] [
89
ptr @vf0i1,
910
ptr @vf1i1,
10-
ptr @vf1i32
11+
ptr @vf1i16
1112
], section "vt1sec", !type !0
1213

13-
; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\02\00\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i32], [0 x i8] zeroinitializer }, !type [[T8]]
14+
; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\04\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i16], [0 x i8] zeroinitializer }, !type [[T8]]
1415
@vt2 = constant [3 x ptr] [
1516
ptr @vf1i1,
1617
ptr @vf0i1,
17-
ptr @vf2i32
18+
ptr @vf2i16
1819
], !type !0
1920

20-
; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [5 x i8], [3 x ptr], [0 x i8] } { [5 x i8] c"\03\00\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf3i32], [0 x i8] zeroinitializer }, align 1, !type [[T5:![0-9]+]]
21+
; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [4 x i8], [3 x ptr], [0 x i8] } { [4 x i8] c"\05\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf3i16], [0 x i8] zeroinitializer }, align 2, !type [[T5:![0-9]+]]
2122
@vt3 = constant [3 x ptr] [
2223
ptr @vf0i1,
2324
ptr @vf1i1,
24-
ptr @vf3i32
25-
], align 1, !type !0
25+
ptr @vf3i16
26+
], align 2, !type !0
2627

27-
; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [16 x i8], [3 x ptr], [0 x i8] } { [16 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\04\00\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i32], [0 x i8] zeroinitializer }, align 16, !type [[T16:![0-9]+]]
28+
; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [16 x i8], [3 x ptr], [0 x i8] } { [16 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\06\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i16], [0 x i8] zeroinitializer }, align 16, !type [[T16:![0-9]+]]
2829
@vt4 = constant [3 x ptr] [
2930
ptr @vf1i1,
3031
ptr @vf0i1,
31-
ptr @vf4i32
32+
ptr @vf4i16
3233
], align 16, !type !0
3334

3435
; CHECK: @vt5 = {{.*}}, !type [[T0:![0-9]+]]
@@ -38,10 +39,35 @@ ptr @__cxa_pure_virtual,
3839
ptr @__cxa_pure_virtual
3940
], !type !0
4041

42+
;; Test relative vtables
43+
; CHECK: [[VT6RELDATA:@[^ ]*]] = private constant { [4 x i8], [3 x i32], [0 x i8] } { [4 x i8] c"\00\00\03\00", [3 x i32] [
44+
; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf0i1 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32),
45+
; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i1 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32),
46+
; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i16 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32)
47+
; CHECK-SAME: ], [0 x i8] zeroinitializer }, !type [[TREL:![0-9]+]]
48+
@vt6_rel = constant [3 x i32] [
49+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf0i1 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32),
50+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i1 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32),
51+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i16 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32)
52+
], !type !2
53+
54+
; CHECK: [[VT7RELDATA:@[^ ]*]] = private constant { [4 x i8], [3 x i32], [0 x i8] } { [4 x i8] c"\00\00\04\00", [3 x i32] [
55+
; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i1 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32),
56+
; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf0i1 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32),
57+
; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2i16 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32)
58+
; CHECK-SAME: ], [0 x i8] zeroinitializer }, !type [[TREL]]
59+
@vt7_rel = constant [3 x i32] [
60+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i1 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32),
61+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf0i1 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32),
62+
i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2i16 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32)
63+
], !type !2
64+
4165
; CHECK: @vt1 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [0 x i8] }, ptr [[VT1DATA]], i32 0, i32 1)
4266
; CHECK: @vt2 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [0 x i8] }, ptr [[VT2DATA]], i32 0, i32 1)
43-
; CHECK: @vt3 = alias [3 x ptr], getelementptr inbounds ({ [5 x i8], [3 x ptr], [0 x i8] }, ptr [[VT3DATA]], i32 0, i32 1)
67+
; CHECK: @vt3 = alias [3 x ptr], getelementptr inbounds ({ [4 x i8], [3 x ptr], [0 x i8] }, ptr [[VT3DATA]], i32 0, i32 1)
4468
; CHECK: @vt4 = alias [3 x ptr], getelementptr inbounds ({ [16 x i8], [3 x ptr], [0 x i8] }, ptr [[VT4DATA]], i32 0, i32 1)
69+
; CHECK: @vt6_rel = alias [3 x i32], getelementptr inbounds ({ [4 x i8], [3 x i32], [0 x i8] }, ptr [[VT6RELDATA]], i32 0, i32 1)
70+
; CHECK: @vt7_rel = alias [3 x i32], getelementptr inbounds ({ [4 x i8], [3 x i32], [0 x i8] }, ptr [[VT7RELDATA]], i32 0, i32 1)
4571

4672
define i1 @vf0i1(ptr %this) readnone {
4773
ret i1 0
@@ -51,20 +77,20 @@ define i1 @vf1i1(ptr %this) readnone {
5177
ret i1 1
5278
}
5379

54-
define i32 @vf1i32(ptr %this) readnone {
55-
ret i32 1
80+
define i16 @vf1i16(ptr %this) readnone {
81+
ret i16 3
5682
}
5783

58-
define i32 @vf2i32(ptr %this) readnone {
59-
ret i32 2
84+
define i16 @vf2i16(ptr %this) readnone {
85+
ret i16 4
6086
}
6187

62-
define i32 @vf3i32(ptr %this) readnone {
63-
ret i32 3
88+
define i16 @vf3i16(ptr %this) readnone {
89+
ret i16 5
6490
}
6591

66-
define i32 @vf4i32(ptr %this) readnone {
67-
ret i32 4
92+
define i16 @vf4i16(ptr %this) readnone {
93+
ret i16 6
6894
}
6995

7096
; CHECK: define i1 @call1(
@@ -87,7 +113,7 @@ define i1 @call2(ptr %obj) {
87113
%vtable = load ptr, ptr %obj
88114
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid")
89115
call void @llvm.assume(i1 %p)
90-
%fptrptr = getelementptr [3 x ptr], ptr %vtable, i32 0, i32 1
116+
%fptrptr = getelementptr [3 x ptr], ptr %vtable, i16 0, i16 1
91117
%fptr = load ptr, ptr %fptrptr
92118
; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -1
93119
; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, ptr [[VTGEP2]]
@@ -98,27 +124,68 @@ define i1 @call2(ptr %obj) {
98124
ret i1 %result
99125
}
100126

101-
; CHECK: define i32 @call3(
102-
define i32 @call3(ptr %obj) {
127+
; CHECK: define i16 @call3(
128+
define i16 @call3(ptr %obj) {
103129
%vtable = load ptr, ptr %obj
104130
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid")
105131
call void @llvm.assume(i1 %p)
106-
%fptrptr = getelementptr [3 x ptr], ptr %vtable, i32 0, i32 2
132+
%fptrptr = getelementptr [3 x ptr], ptr %vtable, i16 0, i16 2
107133
%fptr = load ptr, ptr %fptrptr
108-
; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -5
109-
; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, ptr [[VTGEP3]]
110-
%result = call i32 %fptr(ptr %obj)
111-
; CHECK: ret i32 [[VTLOAD3]]
112-
ret i32 %result
134+
; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -4
135+
; CHECK: [[VTLOAD3:%[^ ]*]] = load i16, ptr [[VTGEP3]]
136+
%result = call i16 %fptr(ptr %obj)
137+
; CHECK: ret i16 [[VTLOAD3]]
138+
ret i16 %result
139+
}
140+
141+
; CHECK: define i1 @call1_rel(
142+
define i1 @call1_rel(ptr %obj) {
143+
%vtable = load ptr, ptr %obj
144+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
145+
call void @llvm.assume(i1 %p)
146+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
147+
%result = call i1 %fptr(ptr %obj)
148+
ret i1 %result
149+
; CHECK: [[RES:%[^ ]*]] = icmp eq ptr %vtable, @vt7_rel
150+
; CHECK: ret i1 [[RES]]
151+
}
152+
153+
; CHECK: define i1 @call2_rel(
154+
define i1 @call2_rel(ptr %obj) {
155+
%vtable = load ptr, ptr %obj
156+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
157+
call void @llvm.assume(i1 %p)
158+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 4)
159+
%result = call i1 %fptr(ptr %obj)
160+
ret i1 %result
161+
; CHECK: [[RES:%[^ ]*]] = icmp eq ptr %vtable, @vt6_rel
162+
; CHECK: ret i1 [[RES]]
163+
}
164+
165+
; CHECK: define i16 @call3_rel(
166+
define i16 @call3_rel(ptr %obj) {
167+
%vtable = load ptr, ptr %obj
168+
%p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
169+
call void @llvm.assume(i1 %p)
170+
%fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
171+
; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -2
172+
; CHECK: [[VTLOAD3:%[^ ]*]] = load i16, ptr [[VTGEP3]]
173+
%result = call i16 %fptr(ptr %obj)
174+
; CHECK: ret i16 [[VTLOAD3]]
175+
ret i16 %result
113176
}
114177

115178
declare i1 @llvm.type.test(ptr, metadata)
116179
declare void @llvm.assume(i1)
117180
declare void @__cxa_pure_virtual()
181+
declare ptr @llvm.load.relative.i32(ptr, i32)
118182

119183
; CHECK: [[T8]] = !{i32 8, !"typeid"}
120-
; CHECK: [[T5]] = !{i32 5, !"typeid"}
184+
; CHECK: [[T5]] = !{i32 4, !"typeid"}
121185
; CHECK: [[T16]] = !{i32 16, !"typeid"}
122186
; CHECK: [[T0]] = !{i32 0, !"typeid"}
187+
; CHECK: [[TREL]] = !{i32 4, !"typeid3"}
123188

124189
!0 = !{i32 0, !"typeid"}
190+
!1 = !{i32 0, !"typeid2"}
191+
!2 = !{i32 0, !"typeid3"}

0 commit comments

Comments
 (0)