1
- use rustc_abi:: { Align , Endian , HasDataLayout , Size } ;
1
+ use rustc_abi:: { Align , BackendRepr , Endian , HasDataLayout , Primitive , Size , TyAndLayout } ;
2
+ use rustc_codegen_ssa:: MemFlags ;
2
3
use rustc_codegen_ssa:: common:: IntPredicate ;
3
4
use rustc_codegen_ssa:: mir:: operand:: OperandRef ;
4
- use rustc_codegen_ssa:: traits:: { BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods } ;
5
+ use rustc_codegen_ssa:: traits:: {
6
+ BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods , LayoutTypeCodegenMethods ,
7
+ } ;
5
8
use rustc_middle:: ty:: Ty ;
6
9
use rustc_middle:: ty:: layout:: { HasTyCtxt , LayoutOf } ;
7
10
@@ -303,6 +306,313 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
303
306
bx. load ( val_type, val_addr, layout. align . abi )
304
307
}
305
308
309
+ fn emit_x86_64_sysv64_va_arg < ' ll , ' tcx > (
310
+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
311
+ list : OperandRef < ' tcx , & ' ll Value > ,
312
+ target_ty : Ty < ' tcx > ,
313
+ ) -> & ' ll Value {
314
+ let dl = bx. cx . data_layout ( ) ;
315
+
316
+ // Implementation of the systemv x86_64 ABI calling convention for va_args, see
317
+ // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
318
+ // based on the one in clang.
319
+
320
+ // We're able to take some shortcuts because the return type of `va_arg` must implement the
321
+ // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
322
+
323
+ // typedef struct __va_list_tag {
324
+ // unsigned int gp_offset;
325
+ // unsigned int fp_offset;
326
+ // void *overflow_arg_area;
327
+ // void *reg_save_area;
328
+ // } va_list[1];
329
+ let va_list_addr = list. immediate ( ) ;
330
+
331
+ // Peel off any newtype wrappers.
332
+ //
333
+ // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
334
+ // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
335
+ // pass such types to variadic functions.
336
+ //
337
+ // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
338
+ // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
339
+ //
340
+ // ```
341
+ // #[repr(C)]
342
+ // struct Empty;
343
+ //
344
+ // #[repr(C)]
345
+ // struct Foo([Empty; 8], i32);
346
+ // ```
347
+ let layout = {
348
+ let mut layout = bx. cx . layout_of ( target_ty) ;
349
+
350
+ while let Some ( ( _, inner) ) = layout. non_1zst_field ( bx. cx ) {
351
+ layout = inner;
352
+ }
353
+
354
+ layout
355
+ } ;
356
+
357
+ // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
358
+ // in the registers. If not go to step 7.
359
+
360
+ // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
361
+ // general purpose registers needed to pass type and num_fp to hold
362
+ // the number of floating point registers needed.
363
+
364
+ let mut num_gp_registers = 0 ;
365
+ let mut num_fp_registers = 0 ;
366
+
367
+ let mut registers_for_primitive = |p| match p {
368
+ Primitive :: Int ( integer, _is_signed) => {
369
+ num_gp_registers += integer. size ( ) . bytes ( ) . div_ceil ( 8 ) as u32 ;
370
+ }
371
+ Primitive :: Float ( float) => {
372
+ num_fp_registers += float. size ( ) . bytes ( ) . div_ceil ( 16 ) as u32 ;
373
+ }
374
+ Primitive :: Pointer ( _) => {
375
+ num_gp_registers += 1 ;
376
+ }
377
+ } ;
378
+
379
+ match layout. layout . backend_repr ( ) {
380
+ BackendRepr :: Scalar ( scalar) => {
381
+ registers_for_primitive ( scalar. primitive ( ) ) ;
382
+ }
383
+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
384
+ registers_for_primitive ( scalar1. primitive ( ) ) ;
385
+ registers_for_primitive ( scalar2. primitive ( ) ) ;
386
+ }
387
+ BackendRepr :: SimdVector { .. } => {
388
+ // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
389
+ unreachable ! (
390
+ "No x86-64 SysV va_arg implementation for {:?}" ,
391
+ layout. layout. backend_repr( )
392
+ )
393
+ }
394
+ BackendRepr :: Memory { .. } => {
395
+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
396
+ return bx. load ( layout. llvm_type ( bx) , mem_addr, layout. align . abi ) ;
397
+ }
398
+ } ;
399
+
400
+ // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
401
+ // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
402
+ // l->fp_offset > 176 - num_fp * 16 go to step 7.
403
+
404
+ let unsigned_int_offset = 4 ;
405
+ let ptr_offset = 8 ;
406
+ let gp_offset_ptr = va_list_addr;
407
+ let fp_offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( unsigned_int_offset) ) ;
408
+
409
+ let gp_offset_v = bx. load ( bx. type_i32 ( ) , gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
410
+ let fp_offset_v = bx. load ( bx. type_i32 ( ) , fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
411
+
412
+ let mut use_regs = bx. const_bool ( false ) ;
413
+
414
+ if num_gp_registers > 0 {
415
+ let max_offset_val = 48u32 - num_gp_registers * 8 ;
416
+ let fits_in_gp = bx. icmp ( IntPredicate :: IntULE , gp_offset_v, bx. const_u32 ( max_offset_val) ) ;
417
+ use_regs = fits_in_gp;
418
+ }
419
+
420
+ if num_fp_registers > 0 {
421
+ let max_offset_val = 176u32 - num_fp_registers * 16 ;
422
+ let fits_in_fp = bx. icmp ( IntPredicate :: IntULE , fp_offset_v, bx. const_u32 ( max_offset_val) ) ;
423
+ use_regs = if num_gp_registers > 0 { bx. and ( use_regs, fits_in_fp) } else { fits_in_fp } ;
424
+ }
425
+
426
+ let in_reg = bx. append_sibling_block ( "va_arg.in_reg" ) ;
427
+ let in_mem = bx. append_sibling_block ( "va_arg.in_mem" ) ;
428
+ let end = bx. append_sibling_block ( "va_arg.end" ) ;
429
+
430
+ bx. cond_br ( use_regs, in_reg, in_mem) ;
431
+
432
+ // Emit code to load the value if it was passed in a register.
433
+ bx. switch_to_block ( in_reg) ;
434
+
435
+ // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
436
+ // an offset of l->gp_offset and/or l->fp_offset. This may require
437
+ // copying to a temporary location in case the parameter is passed
438
+ // in different register classes or requires an alignment greater
439
+ // than 8 for general purpose registers and 16 for XMM registers.
440
+ //
441
+ // FIXME(llvm): This really results in shameful code when we end up needing to
442
+ // collect arguments from different places; often what should result in a
443
+ // simple assembling of a structure from scattered addresses has many more
444
+ // loads than necessary. Can we clean this up?
445
+ let reg_save_area_ptr =
446
+ bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 2 * unsigned_int_offset + ptr_offset) ) ;
447
+ let reg_save_area_v = bx. load ( bx. type_ptr ( ) , reg_save_area_ptr, dl. pointer_align . abi ) ;
448
+
449
+ let reg_addr = match layout. layout . backend_repr ( ) {
450
+ BackendRepr :: Scalar ( scalar) => match scalar. primitive ( ) {
451
+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => {
452
+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
453
+
454
+ // Copy into a temporary if the type is more aligned than the register save area.
455
+ let gp_align = Align :: from_bytes ( 8 ) . unwrap ( ) ;
456
+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout, gp_align)
457
+ }
458
+ Primitive :: Float ( _) => bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ,
459
+ } ,
460
+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
461
+ let ty_lo = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 0 , false ) ;
462
+ let ty_hi = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 1 , false ) ;
463
+
464
+ let align_lo = layout. field ( bx. cx , 0 ) . layout . align ( ) . abi ;
465
+ let align_hi = layout. field ( bx. cx , 1 ) . layout . align ( ) . abi ;
466
+
467
+ match ( scalar1. primitive ( ) , scalar2. primitive ( ) ) {
468
+ ( Primitive :: Float ( _) , Primitive :: Float ( _) ) => {
469
+ // SSE registers are spaced 16 bytes apart in the register save
470
+ // area, we need to collect the two eightbytes together.
471
+ // The ABI isn't explicit about this, but it seems reasonable
472
+ // to assume that the slots are 16-byte aligned, since the stack is
473
+ // naturally 16-byte aligned and the prologue is expected to store
474
+ // all the SSE registers to the RSA.
475
+ let reg_lo_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
476
+ let reg_hi_addr = bx. inbounds_ptradd ( reg_lo_addr, bx. const_i32 ( 16 ) ) ;
477
+
478
+ let align = layout. layout . align ( ) . abi ;
479
+ let tmp = bx. alloca ( layout. layout . size ( ) , align) ;
480
+
481
+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
482
+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
483
+
484
+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
485
+ let field0 = tmp;
486
+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
487
+
488
+ bx. store ( reg_lo, field0, align) ;
489
+ bx. store ( reg_hi, field1, align) ;
490
+
491
+ tmp
492
+ }
493
+ ( Primitive :: Float ( _) , _) | ( _, Primitive :: Float ( _) ) => {
494
+ let gp_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
495
+ let fp_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
496
+
497
+ let ( reg_lo_addr, reg_hi_addr) = match scalar1. primitive ( ) {
498
+ Primitive :: Float ( _) => ( fp_addr, gp_addr) ,
499
+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => ( gp_addr, fp_addr) ,
500
+ } ;
501
+
502
+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
503
+
504
+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
505
+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
506
+
507
+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
508
+ let field0 = tmp;
509
+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
510
+
511
+ bx. store ( reg_lo, field0, align_lo) ;
512
+ bx. store ( reg_hi, field1, align_hi) ;
513
+
514
+ tmp
515
+ }
516
+ ( _, _) => {
517
+ // Two integer/pointer values are just contiguous in memory.
518
+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
519
+
520
+ // Copy into a temporary if the type is more aligned than the register save area.
521
+ let gp_align = Align :: from_bytes ( 8 ) . unwrap ( ) ;
522
+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout, gp_align)
523
+ }
524
+ }
525
+ }
526
+ // The Previous match on `BackendRepr` means control flow already escaped.
527
+ BackendRepr :: SimdVector { .. } | BackendRepr :: Memory { .. } => unreachable ! ( ) ,
528
+ } ;
529
+
530
+ // AMD64-ABI 3.5.7p5: Step 5. Set:
531
+ // l->gp_offset = l->gp_offset + num_gp * 8
532
+ if num_gp_registers > 0 {
533
+ let offset = bx. const_u32 ( num_gp_registers * 8 ) ;
534
+ let sum = bx. add ( gp_offset_v, offset) ;
535
+ // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
536
+ bx. store ( sum, gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
537
+ }
538
+
539
+ // l->fp_offset = l->fp_offset + num_fp * 16.
540
+ if num_fp_registers > 0 {
541
+ let offset = bx. const_u32 ( num_fp_registers * 16 ) ;
542
+ let sum = bx. add ( fp_offset_v, offset) ;
543
+ bx. store ( sum, fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
544
+ }
545
+
546
+ bx. br ( end) ;
547
+
548
+ bx. switch_to_block ( in_mem) ;
549
+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
550
+ bx. br ( end) ;
551
+
552
+ bx. switch_to_block ( end) ;
553
+
554
+ let val_type = layout. llvm_type ( bx) ;
555
+ let val_addr = bx. phi ( bx. type_ptr ( ) , & [ reg_addr, mem_addr] , & [ in_reg, in_mem] ) ;
556
+
557
+ bx. load ( val_type, val_addr, layout. align . abi )
558
+ }
559
+
560
+ /// Copy into a temporary if the type is more aligned than the register save area.
561
+ fn copy_to_temporary_if_more_aligned < ' ll , ' tcx > (
562
+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
563
+ reg_addr : & ' ll Value ,
564
+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
565
+ src_align : Align ,
566
+ ) -> & ' ll Value {
567
+ if layout. layout . align . abi > src_align {
568
+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
569
+ bx. memcpy (
570
+ tmp,
571
+ layout. layout . align . abi ,
572
+ reg_addr,
573
+ src_align,
574
+ bx. const_u32 ( layout. layout . size ( ) . bytes ( ) as u32 ) ,
575
+ MemFlags :: empty ( ) ,
576
+ ) ;
577
+ tmp
578
+ } else {
579
+ reg_addr
580
+ }
581
+ }
582
+
583
+ fn x86_64_sysv64_va_arg_from_memory < ' ll , ' tcx > (
584
+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
585
+ va_list_addr : & ' ll Value ,
586
+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
587
+ ) -> & ' ll Value {
588
+ let dl = bx. cx . data_layout ( ) ;
589
+
590
+ let overflow_arg_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. const_usize ( 8 ) ) ;
591
+
592
+ let overflow_arg_area_v = bx. load ( bx. type_ptr ( ) , overflow_arg_area_ptr, dl. pointer_align . abi ) ;
593
+ // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
594
+ // byte boundary if alignment needed by type exceeds 8 byte boundary.
595
+ // It isn't stated explicitly in the standard, but in practice we use
596
+ // alignment greater than 16 where necessary.
597
+ if layout. layout . align . abi . bytes ( ) > 8 {
598
+ unreachable ! ( "all instances of VaArgSafe have an alignment <= 8" ) ;
599
+ }
600
+
601
+ // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
602
+ let mem_addr = overflow_arg_area_v;
603
+
604
+ // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
605
+ // l->overflow_arg_area + sizeof(type).
606
+ // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
607
+ // an 8 byte boundary.
608
+ let size_in_bytes = layout. layout . size ( ) . bytes ( ) ;
609
+ let offset = bx. const_i32 ( size_in_bytes. next_multiple_of ( 8 ) as i32 ) ;
610
+ let overflow_arg_area = bx. inbounds_ptradd ( overflow_arg_area_v, offset) ;
611
+ bx. store ( overflow_arg_area, overflow_arg_area_ptr, dl. pointer_align . abi ) ;
612
+
613
+ mem_addr
614
+ }
615
+
306
616
fn emit_xtensa_va_arg < ' ll , ' tcx > (
307
617
bx : & mut Builder < ' _ , ' ll , ' tcx > ,
308
618
list : OperandRef < ' tcx , & ' ll Value > ,
@@ -334,8 +644,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
334
644
// (*va).va_ndx
335
645
let va_reg_offset = 4 ;
336
646
let va_ndx_offset = va_reg_offset + 4 ;
337
- let offset_ptr =
338
- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_ndx_offset) ] ) ;
647
+ let offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_ndx_offset) ) ;
339
648
340
649
let offset = bx. load ( bx. type_i32 ( ) , offset_ptr, bx. tcx ( ) . data_layout . i32_align . abi ) ;
341
650
let offset = round_up_to_alignment ( bx, offset, layout. align . abi ) ;
@@ -356,11 +665,10 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
356
665
bx. store ( offset_next, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
357
666
358
667
// (*va).va_reg
359
- let regsave_area_ptr =
360
- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_reg_offset) ] ) ;
668
+ let regsave_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_reg_offset) ) ;
361
669
let regsave_area =
362
670
bx. load ( bx. type_ptr ( ) , regsave_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
363
- let regsave_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , regsave_area, & [ offset] ) ;
671
+ let regsave_value_ptr = bx. inbounds_ptradd ( regsave_area, offset) ;
364
672
bx. br ( end) ;
365
673
366
674
bx. switch_to_block ( from_stack) ;
@@ -381,9 +689,9 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
381
689
bx. store ( offset_next_corrected, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
382
690
383
691
// let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
384
- let stack_area_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( 0 ) ] ) ;
692
+ let stack_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 0 ) ) ;
385
693
let stack_area = bx. load ( bx. type_ptr ( ) , stack_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
386
- let stack_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , stack_area, & [ offset_corrected] ) ;
694
+ let stack_value_ptr = bx. inbounds_ptradd ( stack_area, offset_corrected) ;
387
695
bx. br ( end) ;
388
696
389
697
bx. switch_to_block ( end) ;
@@ -449,6 +757,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
449
757
AllowHigherAlign :: No ,
450
758
)
451
759
}
760
+ // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
761
+ "x86_64" => emit_x86_64_sysv64_va_arg ( bx, addr, target_ty) ,
452
762
"xtensa" => emit_xtensa_va_arg ( bx, addr, target_ty) ,
453
763
// For all other architecture/OS combinations fall back to using
454
764
// the LLVM va_arg instruction.
0 commit comments