@@ -281,74 +281,84 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
281281 auto pool_osize_i32 = ConstantInt::get (Type::getInt32Ty (F.getContext ()), osize);
282282 auto pool_osize = ConstantInt::get (Type::getInt64Ty (F.getContext ()), osize);
283283
284- // Assuming we use the first immix allocator.
285- // FIXME: We should get the allocator index and type from MMTk.
286- auto allocator_offset = offsetof (jl_tls_states_t , mmtk_mutator) + offsetof (MMTkMutatorContext, allocators) + offsetof (Allocators, immix);
287-
288- auto cursor_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, cursor));
289- auto limit_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, limit));
290-
291- auto cursor_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, cursor_pos);
292- auto cursor_ptr = builder.CreateBitCast (cursor_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " cursor_ptr" );
293- auto cursor = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), cursor_ptr, " cursor" );
294-
295- // offset = 8
296- auto delta_offset = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), ConstantInt::get (Type::getInt64Ty (target->getContext ()), 8 ));
297- auto delta_cursor = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), cursor);
298- auto delta_op = builder.CreateNSWAdd (delta_offset, delta_cursor);
299- // alignment 16 (15 = 16 - 1)
300- auto delta = builder.CreateAnd (delta_op, ConstantInt::get (Type::getInt64Ty (target->getContext ()), 15 ), " delta" );
301- auto result = builder.CreateNSWAdd (cursor, delta, " result" );
302-
303- auto new_cursor = builder.CreateNSWAdd (result, pool_osize);
304-
305- auto limit_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, limit_pos);
306- auto limit_ptr = builder.CreateBitCast (limit_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " limit_ptr" );
307- auto limit = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), limit_ptr, " limit" );
308-
309- auto gt_limit = builder.CreateICmpSGT (new_cursor, limit);
310-
311- auto current_block = target->getParent ();
312- builder.SetInsertPoint (target->getNextNode ());
313- auto phiNode = builder.CreatePHI (poolAllocFunc->getReturnType (), 2 , " phi_fast_slow" );
314- auto top_cont = current_block->splitBasicBlock (target->getNextNode (), " top_cont" );
315-
316- auto slowpath = BasicBlock::Create (target->getContext (), " slowpath" , target->getFunction ());
317- auto fastpath = BasicBlock::Create (target->getContext (), " fastpath" , target->getFunction (), top_cont);
318-
319- auto next_br = current_block->getTerminator ();
320- next_br->eraseFromParent ();
321- builder.SetInsertPoint (current_block);
322- builder.CreateCondBr (gt_limit, slowpath, fastpath);
323-
324- // slowpath
325- builder.SetInsertPoint (slowpath);
326- auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
327- auto new_call = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
328- new_call->setAttributes (new_call->getCalledFunction ()->getAttributes ());
329- builder.CreateBr (top_cont);
330-
331- // // fastpath
332- builder.SetInsertPoint (fastpath);
333- builder.CreateStore (new_cursor, cursor_ptr);
334-
335- // ptls->gc_num.allocd += osize;
336- auto pool_alloc_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), offsetof (jl_tls_states_t , gc_num));
337- auto pool_alloc_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, pool_alloc_pos);
338- auto pool_alloc_tls = builder.CreateBitCast (pool_alloc_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " pool_alloc" );
339- auto pool_allocd = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), pool_alloc_tls);
340- auto pool_allocd_total = builder.CreateAdd (pool_allocd, pool_osize);
341- builder.CreateStore (pool_allocd_total, pool_alloc_tls);
342-
343- auto v_raw = builder.CreateNSWAdd (result, ConstantInt::get (Type::getInt64Ty (target->getContext ()), sizeof (jl_taggedvalue_t )));
344- auto v_as_ptr = builder.CreateIntToPtr (v_raw, poolAllocFunc->getReturnType ());
345- builder.CreateBr (top_cont);
346-
347- phiNode->addIncoming (new_call, slowpath);
348- phiNode->addIncoming (v_as_ptr, fastpath);
349- phiNode->takeName (target);
350-
351- return phiNode;
284+ // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
285+ // Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
286+ const bool INLINE_FASTPATH_ALLOCATION = true ;
287+
288+ if (INLINE_FASTPATH_ALLOCATION) {
289+ // Assuming we use the first immix allocator.
290+ // FIXME: We should get the allocator index and type from MMTk.
291+ auto allocator_offset = offsetof (jl_tls_states_t , mmtk_mutator) + offsetof (MMTkMutatorContext, allocators) + offsetof (Allocators, immix);
292+
293+ auto cursor_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, cursor));
294+ auto limit_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, limit));
295+
296+ auto cursor_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, cursor_pos);
297+ auto cursor_ptr = builder.CreateBitCast (cursor_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " cursor_ptr" );
298+ auto cursor = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), cursor_ptr, " cursor" );
299+
300+ // offset = 8
301+ auto delta_offset = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), ConstantInt::get (Type::getInt64Ty (target->getContext ()), 8 ));
302+ auto delta_cursor = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), cursor);
303+ auto delta_op = builder.CreateNSWAdd (delta_offset, delta_cursor);
304+ // alignment 16 (15 = 16 - 1)
305+ auto delta = builder.CreateAnd (delta_op, ConstantInt::get (Type::getInt64Ty (target->getContext ()), 15 ), " delta" );
306+ auto result = builder.CreateNSWAdd (cursor, delta, " result" );
307+
308+ auto new_cursor = builder.CreateNSWAdd (result, pool_osize);
309+
310+ auto limit_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, limit_pos);
311+ auto limit_ptr = builder.CreateBitCast (limit_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " limit_ptr" );
312+ auto limit = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), limit_ptr, " limit" );
313+
314+ auto gt_limit = builder.CreateICmpSGT (new_cursor, limit);
315+
316+ auto current_block = target->getParent ();
317+ builder.SetInsertPoint (target->getNextNode ());
318+ auto phiNode = builder.CreatePHI (poolAllocFunc->getReturnType (), 2 , " phi_fast_slow" );
319+ auto top_cont = current_block->splitBasicBlock (target->getNextNode (), " top_cont" );
320+
321+ auto slowpath = BasicBlock::Create (target->getContext (), " slowpath" , target->getFunction ());
322+ auto fastpath = BasicBlock::Create (target->getContext (), " fastpath" , target->getFunction (), top_cont);
323+
324+ auto next_br = current_block->getTerminator ();
325+ next_br->eraseFromParent ();
326+ builder.SetInsertPoint (current_block);
327+ builder.CreateCondBr (gt_limit, slowpath, fastpath);
328+
329+ // slowpath
330+ builder.SetInsertPoint (slowpath);
331+ auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
332+ auto new_call = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
333+ new_call->setAttributes (new_call->getCalledFunction ()->getAttributes ());
334+ builder.CreateBr (top_cont);
335+
336+ // // fastpath
337+ builder.SetInsertPoint (fastpath);
338+ builder.CreateStore (new_cursor, cursor_ptr);
339+
340+ // ptls->gc_num.allocd += osize;
341+ auto pool_alloc_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), offsetof (jl_tls_states_t , gc_num));
342+ auto pool_alloc_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, pool_alloc_pos);
343+ auto pool_alloc_tls = builder.CreateBitCast (pool_alloc_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " pool_alloc" );
344+ auto pool_allocd = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), pool_alloc_tls);
345+ auto pool_allocd_total = builder.CreateAdd (pool_allocd, pool_osize);
346+ builder.CreateStore (pool_allocd_total, pool_alloc_tls);
347+
348+ auto v_raw = builder.CreateNSWAdd (result, ConstantInt::get (Type::getInt64Ty (target->getContext ()), sizeof (jl_taggedvalue_t )));
349+ auto v_as_ptr = builder.CreateIntToPtr (v_raw, poolAllocFunc->getReturnType ());
350+ builder.CreateBr (top_cont);
351+
352+ phiNode->addIncoming (new_call, slowpath);
353+ phiNode->addIncoming (v_as_ptr, fastpath);
354+ phiNode->takeName (target);
355+
356+ return phiNode;
357+ } else {
358+ auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
359+ newI = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32 });
360+ derefAttr = Attribute::getWithDereferenceableBytes (F.getContext (), osize);
361+ }
352362 #endif // MMTK_GC
353363 }
354364 } else {
0 commit comments