-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Add stack probring in eliminateCallFramePseudoInstr #139731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Stack clash protection code was missing from RISCVFrameLowering::eliminateCallFramePseudoInstr, calling allocateStack fixes it. This patch also fixes the tests in stack-probing-dynamic.ll that should be testing the stack allocation before a function call.
@llvm/pr-subscribers-backend-risc-v Author: Raphael Moreira Zinsly (rzinsly) ChangesStack clash protection code was missing from Full diff: https://github.com/llvm/llvm-project/pull/139731.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 72bec74584059..b80608c05ad57 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1813,9 +1813,22 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
Amount = -Amount;
- const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
- RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
- MachineInstr::NoFlags, getStackAlign());
+ const RISCVTargetLowering *TLI =
+ MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
+ int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
+ if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
+ // When stack probing is enabled, the decrement of SP may need to be
+ // probed. We can handle both the decrement and the probing in
+ // allocateStack.
+ bool DynAllocation =
+ MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
+ allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
+ /*NeedProbe=*/true, ProbeSize, DynAllocation);
+ } else {
+ const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
+ RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
+ MachineInstr::NoFlags, getStackAlign());
+ }
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
index c3c1643e6de01..604271702ebad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
@@ -361,7 +361,7 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; If a function has variable-sized stack objects, then any function calls which
; need to pass arguments on the stack must allocate the stack space for them
; dynamically, to ensure they are at the bottom of the frame.
-define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
+define void @no_reserved_call_frame(i64 %n) #0 {
; RV64I-LABEL: no_reserved_call_frame:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -16
@@ -377,15 +377,20 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
; RV64I-NEXT: addi a0, a0, 15
; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
-; RV64I-NEXT: lui a2, 1
+; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: .LBB4_1: # %entry
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT: sub sp, sp, a2
+; RV64I-NEXT: sub sp, sp, a1
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: blt a0, sp, .LBB4_1
; RV64I-NEXT: # %bb.2: # %entry
; RV64I-NEXT: mv sp, a0
+; RV64I-NEXT: lui a1, 1
+; RV64I-NEXT: sub sp, sp, a1
+; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: call callee_stack_args
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: addi sp, s0, -16
; RV64I-NEXT: .cfi_def_cfa sp, 16
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -407,20 +412,27 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: addi s0, sp, 16
; RV32I-NEXT: .cfi_def_cfa s0, 0
-; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: slli a0, a0, 2
; RV32I-NEXT: addi a0, a0, 15
; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
-; RV32I-NEXT: lui a2, 1
+; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB4_1: # %entry
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: sub sp, sp, a2
+; RV32I-NEXT: sub sp, sp, a1
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: blt a0, sp, .LBB4_1
; RV32I-NEXT: # %bb.2: # %entry
; RV32I-NEXT: mv sp, a0
+; RV32I-NEXT: lui a1, 1
+; RV32I-NEXT: sub sp, sp, a1
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: call callee_stack_args
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: addi sp, s0, -16
; RV32I-NEXT: .cfi_def_cfa sp, 16
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -432,48 +444,70 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
; RV32I-NEXT: ret
entry:
%v = alloca i32, i64 %n
- call void @callee_stack_args(ptr %v, i32 %dummy)
+ call void @callee_stack_args(ptr %v, [518 x i64] poison)
ret void
}
; Same as above but without a variable-sized allocation, so the reserved call
; frame can be folded into the fixed-size allocation in the prologue.
-define void @reserved_call_frame(i64 %n, i32 %dummy) #0 {
+define void @reserved_call_frame(i64 %n) #0 {
; RV64I-LABEL: reserved_call_frame:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: addi sp, sp, -416
-; RV64I-NEXT: .cfi_def_cfa_offset 416
-; RV64I-NEXT: sd ra, 408(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
-; RV64I-NEXT: addi a0, sp, 8
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: .cfi_def_cfa_offset 4096
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: .cfi_def_cfa_offset 4144
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: call callee_stack_args
-; RV64I-NEXT: ld ra, 408(sp) # 8-byte Folded Reload
+; RV64I-NEXT: lui a0, 1
+; RV64I-NEXT: addiw a0, a0, 48
+; RV64I-NEXT: add sp, sp, a0
+; RV64I-NEXT: .cfi_def_cfa_offset 2032
+; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
-; RV64I-NEXT: addi sp, sp, 416
+; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-LABEL: reserved_call_frame:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: addi sp, sp, -416
-; RV32I-NEXT: .cfi_def_cfa_offset 416
-; RV32I-NEXT: sw ra, 412(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
-; RV32I-NEXT: mv a1, a2
-; RV32I-NEXT: addi a0, sp, 12
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: .cfi_def_cfa_offset 4096
+; RV32I-NEXT: addi sp, sp, -80
+; RV32I-NEXT: .cfi_def_cfa_offset 4176
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: addi a0, a0, 36
+; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: call callee_stack_args
-; RV32I-NEXT: lw ra, 412(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lui a0, 1
+; RV32I-NEXT: addi a0, a0, 80
+; RV32I-NEXT: add sp, sp, a0
+; RV32I-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
-; RV32I-NEXT: addi sp, sp, 416
+; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
entry:
- %v = alloca i32, i64 100
- call void @callee_stack_args(ptr %v, i32 %dummy)
+ %v = alloca i32, i64 518
+ call void @callee_stack_args(ptr %v, [518 x i64] poison)
ret void
}
-declare void @callee_stack_args(ptr, i32)
+declare void @callee_stack_args(ptr, [518 x i64])
; Dynamic allocation of vectors
define void @dynamic_vector(i64 %size, ptr %out) #0 {
|
cc @topperc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Stack clash protection code was missing from
RISCVFrameLowering::eliminateCallFramePseudoInstr, calling allocateStack fixes it.
This patch also fixes the tests in stack-probing-dynamic.ll that should be testing the stack allocation before a function call.