-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[Clang][OpenCL][AMDGPU] Add tests for optnone attribute assigned to OpenCL Kernels #138849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][OpenCL][AMDGPU] Add tests for optnone attribute assigned to OpenCL Kernels #138849
Conversation
@llvm/pr-subscribers-clang Author: Aniket Lal (lalaniket8) ChangesOpenCL Kernel stubs should be always inlined #137769 Patch is 48.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138849.diff 1 Files Affected:
diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
index a5b2bee127bd0..655baba1531ca 100644
--- a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
+++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
@@ -96,6 +96,14 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
__attribute__((noinline)) kernel void ext_callee_kern(global int *A);
+__attribute__((optnone)) kernel void callee_kern1(global int *A){
+ *A = 1;
+}
+
+__attribute__((always_inline)) kernel void callee_kern2(global int *A){
+ *A = 1;
+}
+
kernel void ext_callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out);
kernel void ext_callee_kern_Mat32X32(global Mat32X32 *in, global Mat64X64 *out);
@@ -111,7 +119,8 @@ kernel void ext_KernelLargeTwoMember(struct LargeStructTwoMember u);
kernel void caller_kern(global int* A, global Mat3X3 *mat3X3, global Mat4X4 *mat4X4, global Mat32X32 *mat32X32, global Mat64X64 *mat64X64){
callee_kern(A);
ext_callee_kern(A);
-
+ callee_kern1(A);
+ callee_kern2(A);
callee_kern_Mat3X3(mat3X3, mat4X4);
callee_kern_Mat32X32(mat32X32, mat64X64);
ext_callee_kern_Mat3X3(mat3X3, mat4X4);
@@ -214,7 +223,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4:[0-9]+]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6:[0-9]+]]
// X86-NEXT: ret void
//
//
@@ -239,7 +248,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -256,7 +265,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr [[TMP0]], i32 0
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr [[TMP1]], i32 1
-// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]]
+// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]]
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
// X86-NEXT: ret void
//
@@ -271,7 +280,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -288,7 +297,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr [[TMP0]], i32 0
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4
// X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32:%.*]], ptr [[TMP1]], i32 1
-// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]]
+// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]]
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
// X86-NEXT: ret void
//
@@ -297,7 +306,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-LABEL: define spir_kernel void @KernelOneMember(
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] {
// X86-NEXT: entry:
-// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -307,7 +316,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: entry:
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
-// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -315,7 +324,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-LABEL: define spir_kernel void @KernelLargeOneMember(
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
// X86-NEXT: entry:
-// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -325,7 +334,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: entry:
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
-// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -333,7 +342,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-LABEL: define spir_kernel void @KernelTwoMember(
// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
// X86-NEXT: entry:
-// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -343,7 +352,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: entry:
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
-// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -351,7 +360,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-LABEL: define spir_kernel void @KernelLargeTwoMember(
// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
// X86-NEXT: entry:
-// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -361,7 +370,54 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: entry:
// X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
-// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]]
+// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]]
+// X86-NEXT: ret void
+//
+//
+// X86: Function Attrs: convergent noinline norecurse nounwind optnone
+// X86-LABEL: define spir_kernel void @callee_kern1(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT: entry:
+// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern1(ptr noundef align 4 [[TMP0]]) #[[ATTR6]]
+// X86-NEXT: ret void
+//
+//
+// X86: Function Attrs: convergent noinline norecurse nounwind optnone
+// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern1(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT: entry:
+// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT: store i32 1, ptr [[TMP0]], align 4
+// X86-NEXT: ret void
+//
+//
+// X86: Function Attrs: alwaysinline convergent norecurse nounwind
+// X86-LABEL: define spir_kernel void @callee_kern2(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT: entry:
+// X86-NEXT: [[A_ADDR_I:%.*]] = alloca ptr, align 4
+// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT: store ptr [[TMP0]], ptr [[A_ADDR_I]], align 4
+// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4
+// X86-NEXT: store i32 1, ptr [[TMP1]], align 4
+// X86-NEXT: ret void
+//
+//
+// X86: Function Attrs: alwaysinline convergent norecurse nounwind
+// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern2(
+// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] {
+// X86-NEXT: entry:
+// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT: store i32 1, ptr [[TMP0]], align 4
// X86-NEXT: ret void
//
//
@@ -384,7 +440,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -392,6 +448,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern(
// X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] {
// X86-NEXT: entry:
+// X86-NEXT: [[A_ADDR_I:%.*]] = alloca ptr, align 4
// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
// X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4
// X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4
@@ -403,21 +460,27 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4
// X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6]]
// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR4]]
-// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
-// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]]) #[[ATTR4]]
-// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
-// X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP4]], ptr noundef align 4 [[TMP5]]) #[[ATTR4]]
-// X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
-// X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP6]], ptr noundef align 4 [[TMP7]]) #[[ATTR4]]
-// X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
-// X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP8]], ptr noundef align 4 [[TMP9]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR6]]
+// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern1(ptr noundef align 4 [[TMP2]]) #[[ATTR6]]
+// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// X86-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_I]], align 4
+// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4
+// X86-NEXT: store i32 1, ptr [[TMP4]], align 4
+// X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
+// X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP5]], ptr noundef align 4 [[TMP6]]) #[[ATTR6]]
+// X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
+// X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
+// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP7]], ptr noundef align 4 [[TMP8]]) #[[ATTR6]]
+// X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4
+// X86-NEXT: [[TMP10:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4
+// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP9]], ptr noundef align 4 [[TMP10]]) #[[ATTR6]]
+// X86-NEXT: [[TMP11:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4
+// X86-NEXT: [[TMP12:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4
+// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP11]], ptr noundef align 4 [[TMP12]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -428,7 +491,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4
// X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]]
// X86-NEXT: ret void
//
//
@@ -442,10 +505,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTONEMEM]], ptr align 4 [[TMP0]], i32 8, i1 false)
// X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 16, i1 false)
// X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4
-// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]]
-// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]]
-// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
-// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]]
+// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]]
// X8...
[truncated]
|
__attribute__((optnone)) kernel void callee_kern1(global int *A){ | ||
*A = 1; | ||
} | ||
|
||
__attribute__((always_inline)) kernel void callee_kern2(global int *A){ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Give these more meaningful names, include the optnone / always inline
1aac168
to
4ade2b3
Compare
OpenCL Kernel stubs should be always inlined #137769
In case optnone is assigned to kernel, respective stub should not be assigned alwaysinline, we add test for the same.