Skip to content

Commit fbc7d4f

Browse files
wsmosesvchuravygiordanogbaraldi
committed
AllocOpt: Fix stack lowering where alloca continas boxed and unboxed data (#55306)
Co-authored-by: Valentin Churavy <[email protected]> Co-authored-by: Mosè Giordano <[email protected]> Co-authored-by: Gabriel Baraldi <[email protected]>
1 parent 0be37db commit fbc7d4f

File tree

4 files changed

+72
-1
lines changed

4 files changed

+72
-1
lines changed

src/llvm-alloc-helpers.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
8888
memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
8989
memop.isobjref = hasObjref(elty);
9090
auto &field = getField(offset, size, elty);
91+
field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
92+
9193
if (field.second.hasobjref != memop.isobjref)
9294
field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
9395
if (!isstore)
@@ -189,6 +191,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
189191
auto elty = inst->getType();
190192
required.use_info.has_unknown_objref |= hasObjref(elty);
191193
required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
194+
required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
192195
required.use_info.hasunknownmem = true;
193196
} else if (!required.use_info.addMemOp(inst, 0, cur.offset,
194197
inst->getType(),
@@ -280,6 +283,7 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
280283
auto elty = storev->getType();
281284
required.use_info.has_unknown_objref |= hasObjref(elty);
282285
required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
286+
required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
283287
required.use_info.hasunknownmem = true;
284288
} else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
285289
cur.offset, storev->getType(),
@@ -301,10 +305,14 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
301305
}
302306
required.use_info.hasload = true;
303307
auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
308+
Type *elty = storev->getType();
304309
if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
305-
cur.offset, storev->getType(),
310+
cur.offset, elty,
306311
true, required.DL)) {
307312
LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
313+
required.use_info.has_unknown_objref |= hasObjref(elty);
314+
required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
315+
required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
308316
required.use_info.hasunknownmem = true;
309317
}
310318
required.use_info.refload = true;

src/llvm-alloc-helpers.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ namespace jl_alloc {
4646
bool hasaggr:1;
4747
bool multiloc:1;
4848
bool hasload:1;
49+
// The alloc has a unboxed object at this offset.
50+
bool hasunboxed:1;
4951
llvm::Type *elty;
5052
llvm::SmallVector<MemOp,4> accesses;
5153
Field(uint32_t size, llvm::Type *elty)
@@ -54,6 +56,7 @@ namespace jl_alloc {
5456
hasaggr(false),
5557
multiloc(false),
5658
hasload(false),
59+
hasunboxed(false),
5760
elty(elty)
5861
{
5962
}
@@ -93,6 +96,9 @@ namespace jl_alloc {
9396
// The alloc has an aggregate Julia object reference not in an explicit field.
9497
bool has_unknown_objrefaggr:1;
9598

99+
// The alloc has an unboxed object at an unknown offset.
100+
bool has_unknown_unboxed:1;
101+
96102
void reset()
97103
{
98104
escaped = false;
@@ -107,6 +113,7 @@ namespace jl_alloc {
107113
haserror = false;
108114
has_unknown_objref = false;
109115
has_unknown_objrefaggr = false;
116+
has_unknown_unboxed = false;
110117
uses.clear();
111118
preserves.clear();
112119
memops.clear();

src/llvm-alloc-opt.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,10 +252,12 @@ void Optimizer::optimizeAll()
252252
removeAlloc(orig);
253253
continue;
254254
}
255+
bool has_unboxed = use_info.has_unknown_unboxed;
255256
bool has_ref = use_info.has_unknown_objref;
256257
bool has_refaggr = use_info.has_unknown_objrefaggr;
257258
for (auto memop: use_info.memops) {
258259
auto &field = memop.second;
260+
has_unboxed |= field.hasunboxed;
259261
if (field.hasobjref) {
260262
has_ref = true;
261263
// This can be relaxed a little based on hasload
@@ -284,6 +286,19 @@ void Optimizer::optimizeAll()
284286
splitOnStack(orig);
285287
continue;
286288
}
289+
// The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine
290+
// if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }),
291+
// then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }.
292+
// This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value
293+
if (has_unboxed && has_ref) {
294+
REMARK([&]() {
295+
return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
296+
<< "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", orig);
297+
});
298+
if (use_info.hastypeof)
299+
optimizeTag(orig);
300+
continue;
301+
}
287302
REMARK([&](){
288303
return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
289304
<< "GC allocation moved to stack " << ore::NV("GC Allocation", orig);

test/llvmpasses/alloc-opt-bits.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
4+
; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
5+
; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
6+
7+
; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
8+
; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
9+
10+
11+
@tag = external addrspace(10) global {}
12+
13+
@glob = external addrspace(10) global {}
14+
15+
; Test that the gc_preserve intrinsics are deleted directly.
16+
17+
; CHECK-LABEL: @ptr_and_bits
18+
; CHECK-NOT: alloca
19+
; OPAQUE: call noalias {} addrspace(10) @julia.gc_alloc_obj
20+
; TYPED: call noalias ptr addrspace(10) @julia.gc_alloc_obj
21+
22+
define void @ptr_and_bits(i8* %fptr, i1 %b, i1 %b2, i32 %idx) {
23+
%pgcstack = call {}*** @julia.get_pgcstack()
24+
%gcstack = bitcast {}*** %pgcstack to {}**
25+
%current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
26+
%v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10) @tag)
27+
%v2 = bitcast {} addrspace(10)* to { i64, {} addrspace(10)* } addrspace(10)*
28+
%g0 = getelementptr { i64, {} addrspace(10)* }, { i64, {} addrspace(10)* }* %v2, i32 %idx, i32 1
29+
store {} addrspace(10)* @glob, {} addrspace(10)* addrspace(10)* %g0
30+
31+
%g1 = getelementptr { i64, {} addrspace(10)* }, { i64, {} addrspace(10)* }* %v2, i32 %idx, i32 0
32+
store i64 7, i75 addrspace(10) %g1
33+
34+
%res = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %g0
35+
%res2 = load i64, i64 addrspace(10) %g1
36+
ret void
37+
}
38+
39+
declare noalias {} addrspace(10) @julia.gc_alloc_obj({}**, i64, {} addrspace(10))
40+
41+
declare {}*** @julia.get_pgcstack()

0 commit comments

Comments
 (0)