Skip to content

[RISCV] Add stack probring in eliminateCallFramePseudoInstr #139731

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

rzinsly
Copy link
Contributor

@rzinsly rzinsly commented May 13, 2025

Stack clash protection code was missing from
RISCVFrameLowering::eliminateCallFramePseudoInstr, calling allocateStack fixes it.
This patch also fixes the tests in stack-probing-dynamic.ll that should be testing the stack allocation before a function call.

Stack clash protection code was missing from
RISCVFrameLowering::eliminateCallFramePseudoInstr, calling
allocateStack fixes it.
This patch also fixes the tests in stack-probing-dynamic.ll that
should be testing the stack allocation before a function call.
@llvmbot
Copy link
Member

llvmbot commented May 13, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Raphael Moreira Zinsly (rzinsly)

Changes

Stack clash protection code was missing from
RISCVFrameLowering::eliminateCallFramePseudoInstr, calling allocateStack fixes it.
This patch also fixes the tests in stack-probing-dynamic.ll that should be testing the stack allocation before a function call.


Full diff: https://github.com/llvm/llvm-project/pull/139731.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+16-3)
  • (modified) llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll (+58-24)
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 72bec74584059..b80608c05ad57 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1813,9 +1813,22 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
       if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
         Amount = -Amount;
 
-      const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
-      RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
-                   MachineInstr::NoFlags, getStackAlign());
+      const RISCVTargetLowering *TLI =
+          MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
+      int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
+      if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
+        // When stack probing is enabled, the decrement of SP may need to be
+        // probed. We can handle both the decrement and the probing in
+        // allocateStack.
+        bool DynAllocation =
+            MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
+        allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
+                      /*NeedProbe=*/true, ProbeSize, DynAllocation);
+      } else {
+        const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
+        RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
+                     MachineInstr::NoFlags, getStackAlign());
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..604271702ebad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -361,7 +361,7 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
 ; If a function has variable-sized stack objects, then any function calls which
 ; need to pass arguments on the stack must allocate the stack space for them
 ; dynamically, to ensure they are at the bottom of the frame.
-define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
+define void @no_reserved_call_frame(i64 %n) #0 {
 ; RV64I-LABEL: no_reserved_call_frame:
 ; RV64I:       # %bb.0: # %entry
 ; RV64I-NEXT:    addi sp, sp, -16
@@ -377,15 +377,20 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
 ; RV64I-NEXT:    addi a0, a0, 15
 ; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub a0, sp, a0
-; RV64I-NEXT:    lui a2, 1
+; RV64I-NEXT:    lui a1, 1
 ; RV64I-NEXT:  .LBB4_1: # %entry
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    sub sp, sp, a2
+; RV64I-NEXT:    sub sp, sp, a1
 ; RV64I-NEXT:    sd zero, 0(sp)
 ; RV64I-NEXT:    blt a0, sp, .LBB4_1
 ; RV64I-NEXT:  # %bb.2: # %entry
 ; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    lui a1, 1
+; RV64I-NEXT:    sub sp, sp, a1
+; RV64I-NEXT:    sd zero, 0(sp)
 ; RV64I-NEXT:    call callee_stack_args
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    add sp, sp, a0
 ; RV64I-NEXT:    addi sp, s0, -16
 ; RV64I-NEXT:    .cfi_def_cfa sp, 16
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -407,20 +412,27 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
 ; RV32I-NEXT:    .cfi_offset s0, -8
 ; RV32I-NEXT:    addi s0, sp, 16
 ; RV32I-NEXT:    .cfi_def_cfa s0, 0
-; RV32I-NEXT:    mv a1, a2
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    addi a0, a0, 15
 ; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub a0, sp, a0
-; RV32I-NEXT:    lui a2, 1
+; RV32I-NEXT:    lui a1, 1
 ; RV32I-NEXT:  .LBB4_1: # %entry
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    sub sp, sp, a2
+; RV32I-NEXT:    sub sp, sp, a1
 ; RV32I-NEXT:    sw zero, 0(sp)
 ; RV32I-NEXT:    blt a0, sp, .LBB4_1
 ; RV32I-NEXT:  # %bb.2: # %entry
 ; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    lui a1, 1
+; RV32I-NEXT:    sub sp, sp, a1
+; RV32I-NEXT:    sw zero, 0(sp)
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw zero, 0(sp)
 ; RV32I-NEXT:    call callee_stack_args
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:    add sp, sp, a0
 ; RV32I-NEXT:    addi sp, s0, -16
 ; RV32I-NEXT:    .cfi_def_cfa sp, 16
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -432,48 +444,70 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
 ; RV32I-NEXT:    ret
 entry:
   %v = alloca i32, i64 %n
-  call void @callee_stack_args(ptr %v, i32 %dummy)
+  call void @callee_stack_args(ptr %v, [518 x i64] poison)
   ret void
 }
 
 ; Same as above but without a variable-sized allocation, so the reserved call
 ; frame can be folded into the fixed-size allocation in the prologue.
-define void @reserved_call_frame(i64 %n, i32 %dummy) #0 {
+define void @reserved_call_frame(i64 %n) #0 {
 ; RV64I-LABEL: reserved_call_frame:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    addi sp, sp, -416
-; RV64I-NEXT:    .cfi_def_cfa_offset 416
-; RV64I-NEXT:    sd ra, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi sp, sp, -2032
+; RV64I-NEXT:    .cfi_def_cfa_offset 2032
+; RV64I-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    .cfi_offset ra, -8
-; RV64I-NEXT:    addi a0, sp, 8
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    sub sp, sp, a0
+; RV64I-NEXT:    sd zero, 0(sp)
+; RV64I-NEXT:    .cfi_def_cfa_offset 4096
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    .cfi_def_cfa_offset 4144
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    add a0, sp, a0
 ; RV64I-NEXT:    call callee_stack_args
-; RV64I-NEXT:    ld ra, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    lui a0, 1
+; RV64I-NEXT:    addiw a0, a0, 48
+; RV64I-NEXT:    add sp, sp, a0
+; RV64I-NEXT:    .cfi_def_cfa_offset 2032
+; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    .cfi_restore ra
-; RV64I-NEXT:    addi sp, sp, 416
+; RV64I-NEXT:    addi sp, sp, 2032
 ; RV64I-NEXT:    .cfi_def_cfa_offset 0
 ; RV64I-NEXT:    ret
 ;
 ; RV32I-LABEL: reserved_call_frame:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    addi sp, sp, -416
-; RV32I-NEXT:    .cfi_def_cfa_offset 416
-; RV32I-NEXT:    sw ra, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi sp, sp, -2032
+; RV32I-NEXT:    .cfi_def_cfa_offset 2032
+; RV32I-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    .cfi_offset ra, -4
-; RV32I-NEXT:    mv a1, a2
-; RV32I-NEXT:    addi a0, sp, 12
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    sw zero, 0(sp)
+; RV32I-NEXT:    .cfi_def_cfa_offset 4096
+; RV32I-NEXT:    addi sp, sp, -80
+; RV32I-NEXT:    .cfi_def_cfa_offset 4176
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a0, a0, 36
+; RV32I-NEXT:    add a0, sp, a0
 ; RV32I-NEXT:    call callee_stack_args
-; RV32I-NEXT:    lw ra, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a0, a0, 80
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    .cfi_def_cfa_offset 2032
+; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    .cfi_restore ra
-; RV32I-NEXT:    addi sp, sp, 416
+; RV32I-NEXT:    addi sp, sp, 2032
 ; RV32I-NEXT:    .cfi_def_cfa_offset 0
 ; RV32I-NEXT:    ret
 entry:
-  %v = alloca i32, i64 100
-  call void @callee_stack_args(ptr %v, i32 %dummy)
+  %v = alloca i32, i64 518
+  call void @callee_stack_args(ptr %v, [518 x i64] poison)
   ret void
 }
 
-declare void @callee_stack_args(ptr, i32)
+declare void @callee_stack_args(ptr, [518 x i64])
 
 ; Dynamic allocation of vectors
 define void @dynamic_vector(i64 %size, ptr %out) #0 {

@rzinsly
Copy link
Contributor Author

rzinsly commented May 13, 2025

cc @topperc

@topperc topperc self-requested a review May 13, 2025 15:46
Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants