Skip to content

[Clang][OpenCL][AMDGPU] Add tests for optnone attribute assigned to OpenCL Kernels #138849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 7, 2025

Conversation

lalaniket8
Copy link
Contributor

OpenCL Kernel stubs should be always inlined #137769
In case optnone is assigned to kernel, respective stub should not be assigned alwaysinline, we add test for the same.

@lalaniket8 lalaniket8 requested a review from arsenm May 7, 2025 11:25
@llvmbot llvmbot added the clang Clang issues not falling into any other category label May 7, 2025
@lalaniket8 lalaniket8 requested a review from yxsamliu May 7, 2025 11:26
@llvmbot
Copy link
Member

llvmbot commented May 7, 2025

@llvm/pr-subscribers-clang

Author: Aniket Lal (lalaniket8)

Changes

OpenCL Kernel stubs should be always inlined #137769
In case optnone is assigned to kernel, respective stub should not be assigned alwaysinline, we add test for the same.


Patch is 48.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138849.diff

1 Files Affected:

  • (modified) clang/test/CodeGenOpenCL/opencl-kernel-call.cl (+194-77)
diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
index a5b2bee127bd0..655baba1531ca 100644
--- a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
+++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
@@ -96,6 +96,14 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
 
 __attribute__((noinline)) kernel void ext_callee_kern(global int *A);
 
+__attribute__((optnone)) kernel void callee_kern1(global int *A){
+  *A = 1;
+}
+
+__attribute__((always_inline)) kernel void callee_kern2(global int *A){
+  *A = 1;
+}
+
 kernel void ext_callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out);
 
 kernel void ext_callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out);
@@ -111,7 +119,8 @@ kernel void ext_KernelLargeTwoMember(struct LargeStructTwoMember u);
 kernel void caller_kern(global int* A, global Mat3X3 *mat3X3, global Mat4X4 *mat4X4, global Mat32X32 *mat32X32, global Mat64X64 *mat64X64){
   callee_kern(A);
   ext_callee_kern(A);
-
+  callee_kern1(A);
+  callee_kern2(A);
   callee_kern_Mat3X3(mat3X3, mat4X4);
   callee_kern_Mat32X32(mat32X32, mat64X64);
   ext_callee_kern_Mat3X3(mat3X3, mat4X4);
@@ -214,7 +223,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
 // X86-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4:[0-9]+]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6:[0-9]+]]
 // X86-NEXT:    ret void
 //
 //
@@ -239,7 +248,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -256,7 +265,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr [[TMP0]], i32 0
 // X86-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr [[TMP1]], i32 1
-// X86-NEXT:    call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]]
+// X86-NEXT:    call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]]
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
 // X86-NEXT:    ret void
 //
@@ -271,7 +280,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -288,7 +297,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr [[TMP0]], i32 0
 // X86-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
 // X86-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32:%.*]], ptr [[TMP1]], i32 1
-// X86-NEXT:    call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]]
+// X86-NEXT:    call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]]
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
 // X86-NEXT:    ret void
 //
@@ -297,7 +306,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-LABEL: define spir_kernel void @KernelOneMember(
 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -307,7 +316,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:  entry:
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
-// X86-NEXT:    call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -315,7 +324,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-LABEL: define spir_kernel void @KernelLargeOneMember(
 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -325,7 +334,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:  entry:
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
-// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -333,7 +342,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-LABEL: define spir_kernel void @KernelTwoMember(
 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -343,7 +352,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:  entry:
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
-// X86-NEXT:    call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -351,7 +360,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-LABEL: define spir_kernel void @KernelLargeTwoMember(
 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -361,7 +370,54 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:  entry:
 // X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
-// X86-NEXT:    call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT:    call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
+// X86-NEXT:    ret void
+//
+//
+// X86: Function Attrs: convergent noinline norecurse nounwind optnone
+// X86-LABEL: define spir_kernel void @callee_kern1(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT:  entry:
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern1(ptr noundef align 4 [[TMP0]]) #[[ATTR6]]
+// X86-NEXT:    ret void
+//
+//
+// X86: Function Attrs: convergent noinline norecurse nounwind optnone
+// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern1(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT:  entry:
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT:    store i32 1, ptr [[TMP0]], align 4
+// X86-NEXT:    ret void
+//
+//
+// X86: Function Attrs: alwaysinline convergent norecurse nounwind
+// X86-LABEL: define spir_kernel void @callee_kern2(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT:  entry:
+// X86-NEXT:    [[A_ADDR_I:%.*]] = alloca ptr, align 4
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT:    store ptr [[TMP0]], ptr [[A_ADDR_I]], align 4
+// X86-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4
+// X86-NEXT:    store i32 1, ptr [[TMP1]], align 4
+// X86-NEXT:    ret void
+//
+//
+// X86: Function Attrs: alwaysinline convergent norecurse nounwind
+// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern2(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT:  entry:
+// X86-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT:    store i32 1, ptr [[TMP0]], align 4
 // X86-NEXT:    ret void
 //
 //
@@ -384,7 +440,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
 // X86-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
 // X86-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -392,6 +448,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern(
 // X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] {
 // X86-NEXT:  entry:
+// X86-NEXT:    [[A_ADDR_I:%.*]] = alloca ptr, align 4
 // X86-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 4
 // X86-NEXT:    [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4
 // X86-NEXT:    [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4
@@ -403,21 +460,27 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4
 // X86-NEXT:    store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6]]
 // X86-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
-// X86-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
-// X86-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]]) #[[ATTR4]]
-// X86-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
-// X86-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP4]], ptr noundef align 4 [[TMP5]]) #[[ATTR4]]
-// X86-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
-// X86-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP6]], ptr noundef align 4 [[TMP7]]) #[[ATTR4]]
-// X86-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
-// X86-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP8]], ptr noundef align 4 [[TMP9]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR6]]
+// X86-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern1(ptr noundef align 4 [[TMP2]]) #[[ATTR6]]
+// X86-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT:    store ptr [[TMP3]], ptr [[A_ADDR_I]], align 4
+// X86-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4
+// X86-NEXT:    store i32 1, ptr [[TMP4]], align 4
+// X86-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
+// X86-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP5]], ptr noundef align 4 [[TMP6]]) #[[ATTR6]]
+// X86-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
+// X86-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
+// X86-NEXT:    call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP7]], ptr noundef align 4 [[TMP8]]) #[[ATTR6]]
+// X86-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
+// X86-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
+// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP9]], ptr noundef align 4 [[TMP10]]) #[[ATTR6]]
+// X86-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
+// X86-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
+// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP11]], ptr noundef align 4 [[TMP12]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -428,7 +491,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4
 // X86-NEXT:    store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]]
 // X86-NEXT:    ret void
 //
 //
@@ -442,10 +505,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTONEMEM]], ptr align 4 [[TMP0]], i32 8, i1 false)
 // X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 16, i1 false)
 // X86-NEXT:    store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
-// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]]
+// X86-NEXT:    call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]]
 // X8...
[truncated]

@lalaniket8 lalaniket8 requested a review from cdevadas May 7, 2025 11:26
@lalaniket8 lalaniket8 changed the title [Clang][OpenCL][AMDGPU] Add tests for optnone and alwaysinline attributes assigned to OpenCL Kernels [Clang][OpenCL][AMDGPU] Add tests for optnone attribute assigned to OpenCL Kernels May 7, 2025
Comment on lines 99 to 103
__attribute__((optnone)) kernel void callee_kern1(global int *A){
*A = 1;
}

__attribute__((always_inline)) kernel void callee_kern2(global int *A){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Give these more meaningful names, include the optnone / always inline

@lalaniket8 lalaniket8 force-pushed the users/lalaniket8/alwaysinline-opencl-kernel-stubs branch from 1aac168 to 4ade2b3 Compare May 7, 2025 11:38
@lalaniket8 lalaniket8 requested a review from arsenm May 7, 2025 11:45
@lalaniket8 lalaniket8 merged commit 17b2b6d into main May 7, 2025
8 of 10 checks passed
@lalaniket8 lalaniket8 deleted the users/lalaniket8/alwaysinline-opencl-kernel-stubs branch May 7, 2025 12:39
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants