diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h index fa4ce5efc39ad..ef4f64a167742 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h @@ -392,6 +392,38 @@ class OpenACCClauseCIREmitter final return clauseNotImplemented(clause); } } + + void VisitWorkerClause(const OpenACCWorkerClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { + if (clause.hasIntExpr()) + operation.addWorkerNumOperand(builder.getContext(), + createIntExpr(clause.getIntExpr()), + lastDeviceTypeValues); + else + operation.addEmptyWorker(builder.getContext(), lastDeviceTypeValues); + + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. Combined constructs remain. + return clauseNotImplemented(clause); + } + } + + void VisitVectorClause(const OpenACCVectorClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) { + if (clause.hasIntExpr()) + operation.addVectorOperand(builder.getContext(), + createIntExpr(clause.getIntExpr()), + lastDeviceTypeValues); + else + operation.addEmptyVector(builder.getContext(), lastDeviceTypeValues); + + } else { + // TODO: When we've implemented this for everything, switch this to an + // unreachable. Combined constructs remain. + return clauseNotImplemented(clause); + } + } }; template <typename OpTy> diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp index b255a01adda0e..d636d1b37d969 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp @@ -193,4 +193,134 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { // CHECK: acc.yield // CHECK-NEXT: } loc + +#pragma acc kernels + { + +#pragma acc loop worker + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.loop worker { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker(N) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker device_type(nvidia, radeon) worker + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker(N) device_type(nvidia, radeon) worker + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.loop worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker device_type(nvidia, radeon) worker(N) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.loop worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop device_type(nvidia, radeon) worker(num:N + 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) { + +#pragma acc loop vector + for(unsigned I = 0; I < N; ++I); + // CHECK: acc.loop vector { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop vector(N) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop vector device_type(nvidia, radeon) vector + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.loop vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop vector(N) device_type(nvidia, radeon) vector + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: acc.loop vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop device_type(nvidia, radeon) vector(length:N + 1) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker vector device_type(nvidia) worker vector + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>]) + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N) + for(unsigned I = 0; I < N; ++I); + // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32 + // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32 + // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i + // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32 + // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) { + // CHECK: acc.yield + // CHECK-NEXT: } loc + } } diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index 41b01a14a6498..ca564037fad19 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -2216,6 +2216,21 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", // values should be integral constants, with the '*' represented as a '-1'. void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>, mlir::ValueRange); + + // Add a value to the 'vector' list with a current list of device_types. + void addVectorOperand(MLIRContext *, mlir::Value, + llvm::ArrayRef<DeviceType>); + // Add an empty value to the 'vector' list with a current list of + // device_types. This is for the case where there is no expression specified + // in a 'vector'. + void addEmptyVector(MLIRContext *, llvm::ArrayRef<DeviceType>); + // Add a value to the 'worker' list with a current list of device_types. + void addWorkerNumOperand(MLIRContext *, mlir::Value, + llvm::ArrayRef<DeviceType>); + // Add an empty value to the 'worker' list with a current list of + // device_types. This is for the case where there is no expression specified + // in a 'worker'. + void addEmptyWorker(MLIRContext *, llvm::ArrayRef<DeviceType>); }]; let hasCustomAssemblyFormat = 1; diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index f26b3a5143c0b..9f4645a4a7ca8 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -2720,6 +2720,34 @@ void acc::LoopOp::setTileForDeviceTypes( setTileOperandsSegments(segments); } +void acc::LoopOp::addVectorOperand( + MLIRContext *context, mlir::Value newValue, + llvm::ArrayRef<DeviceType> effectiveDeviceTypes) { + setVectorOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper( + context, getVectorOperandsDeviceTypeAttr(), effectiveDeviceTypes, + newValue, getVectorOperandsMutable())); +} + +void acc::LoopOp::addEmptyVector( + MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) { + setVectorAttr(addDeviceTypeAffectedOperandHelper(context, getVectorAttr(), + effectiveDeviceTypes)); +} + +void acc::LoopOp::addWorkerNumOperand( + MLIRContext *context, mlir::Value newValue, + llvm::ArrayRef<DeviceType> effectiveDeviceTypes) { + setWorkerNumOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper( + context, getWorkerNumOperandsDeviceTypeAttr(), effectiveDeviceTypes, + newValue, getWorkerNumOperandsMutable())); +} + +void acc::LoopOp::addEmptyWorker( + MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) { + setWorkerAttr(addDeviceTypeAffectedOperandHelper(context, getWorkerAttr(), + effectiveDeviceTypes)); +} + //===----------------------------------------------------------------------===// // DataOp //===----------------------------------------------------------------------===//