Skip to content

Commit bf1c43e

Browse files
authored
Allow skip inlined fastpath (#19)
1 parent 0d8bbd9 commit bf1c43e

File tree

1 file changed

+78
-68
lines changed

1 file changed

+78
-68
lines changed

src/llvm-final-gc-lowering.cpp

Lines changed: 78 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -281,74 +281,84 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
281281
auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
282282
auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);
283283

284-
// Assuming we use the first immix allocator.
285-
// FIXME: We should get the allocator index and type from MMTk.
286-
auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);
287-
288-
auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
289-
auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit));
290-
291-
auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
292-
auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr");
293-
auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor");
294-
295-
// offset = 8
296-
auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
297-
auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
298-
auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
299-
// alignment 16 (15 = 16 - 1)
300-
auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
301-
auto result = builder.CreateNSWAdd(cursor, delta, "result");
302-
303-
auto new_cursor = builder.CreateNSWAdd(result, pool_osize);
304-
305-
auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
306-
auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr");
307-
auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit");
308-
309-
auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);
310-
311-
auto current_block = target->getParent();
312-
builder.SetInsertPoint(target->getNextNode());
313-
auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow");
314-
auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont");
315-
316-
auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
317-
auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont);
318-
319-
auto next_br = current_block->getTerminator();
320-
next_br->eraseFromParent();
321-
builder.SetInsertPoint(current_block);
322-
builder.CreateCondBr(gt_limit, slowpath, fastpath);
323-
324-
// slowpath
325-
builder.SetInsertPoint(slowpath);
326-
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
327-
auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
328-
new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
329-
builder.CreateBr(top_cont);
330-
331-
// // fastpath
332-
builder.SetInsertPoint(fastpath);
333-
builder.CreateStore(new_cursor, cursor_ptr);
334-
335-
// ptls->gc_num.allocd += osize;
336-
auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num));
337-
auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
338-
auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
339-
auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
340-
auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
341-
builder.CreateStore(pool_allocd_total, pool_alloc_tls);
342-
343-
auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
344-
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
345-
builder.CreateBr(top_cont);
346-
347-
phiNode->addIncoming(new_call, slowpath);
348-
phiNode->addIncoming(v_as_ptr, fastpath);
349-
phiNode->takeName(target);
350-
351-
return phiNode;
284+
// Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
285+
// Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
286+
const bool INLINE_FASTPATH_ALLOCATION = true;
287+
288+
if (INLINE_FASTPATH_ALLOCATION) {
289+
// Assuming we use the first immix allocator.
290+
// FIXME: We should get the allocator index and type from MMTk.
291+
auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);
292+
293+
auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
294+
auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit));
295+
296+
auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos);
297+
auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr");
298+
auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor");
299+
300+
// offset = 8
301+
auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8));
302+
auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor);
303+
auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor);
304+
// alignment 16 (15 = 16 - 1)
305+
auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta");
306+
auto result = builder.CreateNSWAdd(cursor, delta, "result");
307+
308+
auto new_cursor = builder.CreateNSWAdd(result, pool_osize);
309+
310+
auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos);
311+
auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr");
312+
auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit");
313+
314+
auto gt_limit = builder.CreateICmpSGT(new_cursor, limit);
315+
316+
auto current_block = target->getParent();
317+
builder.SetInsertPoint(target->getNextNode());
318+
auto phiNode = builder.CreatePHI(poolAllocFunc->getReturnType(), 2, "phi_fast_slow");
319+
auto top_cont = current_block->splitBasicBlock(target->getNextNode(), "top_cont");
320+
321+
auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction());
322+
auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction(), top_cont);
323+
324+
auto next_br = current_block->getTerminator();
325+
next_br->eraseFromParent();
326+
builder.SetInsertPoint(current_block);
327+
builder.CreateCondBr(gt_limit, slowpath, fastpath);
328+
329+
// slowpath
330+
builder.SetInsertPoint(slowpath);
331+
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
332+
auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
333+
new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
334+
builder.CreateBr(top_cont);
335+
336+
// // fastpath
337+
builder.SetInsertPoint(fastpath);
338+
builder.CreateStore(new_cursor, cursor_ptr);
339+
340+
// ptls->gc_num.allocd += osize;
341+
auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_num));
342+
auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
343+
auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
344+
auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
345+
auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
346+
builder.CreateStore(pool_allocd_total, pool_alloc_tls);
347+
348+
auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
349+
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
350+
builder.CreateBr(top_cont);
351+
352+
phiNode->addIncoming(new_call, slowpath);
353+
phiNode->addIncoming(v_as_ptr, fastpath);
354+
phiNode->takeName(target);
355+
356+
return phiNode;
357+
} else {
358+
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
359+
newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
360+
derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize);
361+
}
352362
#endif // MMTK_GC
353363
}
354364
} else {

0 commit comments

Comments
 (0)