-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[NVPTX] Fixup AutoUpgrade of llvm.nvvm.atomic.load.{inc,dec}.32 #138907
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
AlexMaclean
merged 2 commits into
llvm:main
from
AlexMaclean:dev/amaclean/upstream/atom-inc-dec-remove-2
May 8, 2025
Merged
[NVPTX] Fixup AutoUpgrade of llvm.nvvm.atomic.load.{inc,dec}.32 #138907
AlexMaclean
merged 2 commits into
llvm:main
from
AlexMaclean:dev/amaclean/upstream/atom-inc-dec-remove-2
May 8, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesThe previous implementation failed to account for the fact that these intrinsics have an overloaded pointer type. This version handles the pointer type and adds tests for llvm.nvvm.atomic.load.add.{f32,f64}. Full diff: https://github.com/llvm/llvm-project/pull/138907.diff 2 Files Affected:
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 8bf93555b1fd2..93d8b8865eb8a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1353,12 +1353,12 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
// nvvm.{min,max}.{i,ii,ui,ull}
Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
Name == "ui" || Name == "ull";
- else if (Name.consume_front("atomic.load.add."))
- // nvvm.atomic.load.add.{f32.p,f64.p}
- Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
- else if (Name.consume_front("atomic.load.") && Name.consume_back(".32"))
- // nvvm.atomic.load.{inc,dec}.32
- Expand = Name == "inc" || Name == "dec";
+ else if (Name.consume_front("atomic.load."))
+ // nvvm.atomic.load.add.{f32,f64}.p
+ // nvvm.atomic.load.{inc,dec}.32.p
+ Expand = Name.starts_with("add.f32.p") ||
+ Name.starts_with("add.f64.p") ||
+ Name.starts_with("inc.32.p") || Name.starts_with("dec.32.p");
else if (Name.consume_front("bitcast."))
// nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
Expand =
@@ -2383,10 +2383,12 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
Value *Val = CI->getArgOperand(1);
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
AtomicOrdering::SequentiallyConsistent);
- } else if (Name.consume_front("atomic.load.") && Name.consume_back(".32")) {
+ } else if (Name.starts_with("atomic.load.inc.32.p") ||
+ Name.starts_with("atomic.load.dec.32.p")) {
Value *Ptr = CI->getArgOperand(0);
Value *Val = CI->getArgOperand(1);
- auto Op = Name == "inc" ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
+ auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
+ : AtomicRMWInst::UDecWrap;
Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
AtomicOrdering::SequentiallyConsistent);
} else if (Name.consume_front("max.") &&
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 98ffa23fae64b..2bfa1c2dfba7a 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -58,8 +58,10 @@ declare i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr, i32)
declare ptr @llvm.nvvm.ldg.global.p.p0(ptr, i32)
declare float @llvm.nvvm.ldg.global.f.f32.p0(ptr, i32)
-declare i32 @llvm.nvvm.atomic.load.inc.32(ptr, i32)
-declare i32 @llvm.nvvm.atomic.load.dec.32(ptr, i32)
+declare i32 @llvm.nvvm.atomic.load.inc.32.p0(ptr, i32)
+declare i32 @llvm.nvvm.atomic.load.dec.32.p0(ptr, i32)
+declare i32 @llvm.nvvm.atomic.load.add.f32.p0(ptr, float)
+declare i32 @llvm.nvvm.atomic.load.add.f64.p0(ptr, double)
declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3), i32)
@@ -267,12 +269,16 @@ define void @ldg(ptr %p0, ptr addrspace(1) %p1) {
}
; CHECK-LABEL: @atomics
-define i32 @atomics(ptr %p0, i32 %a) {
+define i32 @atomics(ptr %p0, i32 %a, float %b, double %c) {
; CHECK: %1 = atomicrmw uinc_wrap ptr %p0, i32 %a seq_cst
; CHECK: %2 = atomicrmw udec_wrap ptr %p0, i32 %a seq_cst
+; CHECK: %3 = atomicrmw fadd ptr %p0, float %b seq_cst
+; CHECK: %4 = atomicrmw fadd ptr %p0, double %c seq_cst
- %r1 = call i32 @llvm.nvvm.atomic.load.inc.32(ptr %p0, i32 %a)
- %r2 = call i32 @llvm.nvvm.atomic.load.dec.32(ptr %p0, i32 %a)
+ %r1 = call i32 @llvm.nvvm.atomic.load.inc.32.p0(ptr %p0, i32 %a)
+ %r2 = call i32 @llvm.nvvm.atomic.load.dec.32.p0(ptr %p0, i32 %a)
+ %r3 = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %p0, float %b)
+ %r4 = call double @llvm.nvvm.atomic.load.add.f64.p0(ptr %p0, double %c)
ret i32 %r2
}
|
Artem-B
approved these changes
May 7, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
The previous implementation failed to account for the fact that these intrinsics have an overloaded pointer type. This version handles the pointer type and adds tests for llvm.nvvm.atomic.load.add.{f32,f64}.