Skip to content

Commit 30cd230

Browse files
mbaretleo-blonktristan-arm
authored
[BYOC][ETHOSN] Add support for quantized convolution (apache#6335)
* [BYOC][ETHOSN] Add support for quantized convolution This PR adds support for quantized convolution. This includes mapping it via a composite function and all the necessary methods to convert from Relay to the APIs in Support Library. Co-authored-by: Leo Blonk <[email protected]> Co-authored-by: Tristan O'Connor <[email protected]> * Fix padding change Change-Id: I0794b0ac6190478e2d1b858ad0dd90f37fc0207b * Add docs to Tvm2Npu methods Change-Id: Iab865619b449a3d0dd6bb0dbdcb198acd529fc4e * Remove generate tests Change-Id: I51f90499f7ce82a1ce49f0731d3d50627e1d0225 Co-authored-by: Leo Blonk <[email protected]> Co-authored-by: Tristan O'Connor <[email protected]>
1 parent e35b7fc commit 30cd230

File tree

7 files changed

+486
-2
lines changed

7 files changed

+486
-2
lines changed

python/tvm/relay/op/contrib/ethosn.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
"""Arm(R) Ethos(TM) -N NPU supported operators."""
1919
from enum import Enum
2020
import tvm.ir
21+
from ...dataflow_pattern import wildcard, is_op, is_constant
2122
from ... import qnn as _qnn
23+
from .register import register_pattern_table
2224
from . import _ethosn as support
2325

2426

@@ -40,6 +42,30 @@ def ethosn_available():
4042
return Available.SW_AND_HW if hw else Available.SW_ONLY
4143

4244

45+
@register_pattern_table("ethos-n")
46+
def pattern_table():
47+
"""Get the Ethos-N compiler pattern table."""
48+
def qnn_conv_pattern():
49+
pattern = is_op('nn.pad')(wildcard()) | wildcard()
50+
pattern = is_op('qnn.conv2d')(
51+
pattern, is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
52+
pattern = is_op('nn.bias_add')(pattern, is_constant())
53+
pattern = is_op('qnn.requantize')(
54+
pattern, is_constant(), is_constant(), is_constant(), is_constant())
55+
return pattern
56+
57+
def check_conv2d(extract):
58+
"""Check if a conv2d is supported by Ethos-N."""
59+
if not ethosn_available():
60+
return False
61+
62+
return support.conv2d(extract)
63+
64+
return [
65+
("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d),
66+
]
67+
68+
4369
@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
4470
def qnn_concatenate(attrs, args):
4571
"""Check if a concatenate is supported by Ethos-N."""

src/relay/backend/contrib/ethosn/codegen.cc

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@ bool IsEthosnOp(const Call& call, const std::string& op_name) {
5050
}
5151
}
5252

53+
bool IsEthosnFunc(const Call& call, const std::string& op_name) {
54+
if (call->op->IsInstance<FunctionNode>()) {
55+
Function func = Downcast<Function>(call->op);
56+
CHECK(func.defined());
57+
auto name_node = func->GetAttr<String>(attr::kComposite);
58+
return name_node.value() == op_name;
59+
}
60+
return false;
61+
}
62+
5363
std::map<Expr, std::vector<sl::TensorInfo>> InferTensorsVisitor::Infer(const Expr& expr) {
5464
tensor_table_.clear();
5565
CHECK(expr->checked_type().defined());
@@ -69,7 +79,11 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
6979
EthosnError err;
7080
Call call = GetRef<Call>(cn);
7181
// Determine call -> NPU mapping
72-
if (IsEthosnOp(call, "qnn.concatenate")) {
82+
if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
83+
ConvolutionParams params;
84+
err += EthosnAPI::QnnConv2d(cn->op.as<FunctionNode>()->body, &params);
85+
tensor_table_[cn->args[0]] = {params.activation_info};
86+
} else if (IsEthosnOp(call, "qnn.concatenate")) {
7387
ConcatenateParams params;
7488
err = EthosnAPI::Concatenate(call, &params);
7589
tensor_table_[cn->args[0]] = params.input_infos;
@@ -181,7 +195,10 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {
181195
sl::TensorAndId<sl::Operand> tensor;
182196
sl::TensorsAndId tensors;
183197
// Determine call -> NPU mapping
184-
if (IsEthosnOp(call, "qnn.concatenate")) {
198+
if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
199+
if ((err = MakeConvolutionLayer(call, &tensor))) ReportFatalError(call, err);
200+
return MakeOps(tensor);
201+
} else if (IsEthosnOp(call, "qnn.concatenate")) {
185202
if ((err = MakeConcatenateLayer(call, &tensor))) ReportFatalError(call, err);
186203
return MakeOps(tensor);
187204
} else if (IsEthosnOp(call, "split")) {
@@ -227,6 +244,28 @@ void ConstructNetworkVisitor::VisitLeaf(const Expr& expr) {
227244
if (!expr->IsInstance<FunctionNode>()) MixedModeVisitor::VisitLeaf(expr);
228245
}
229246

247+
EthosnError ConstructNetworkVisitor::MakeConvolutionLayer(const Call& call,
248+
sl::TensorAndId<sl::Operand>* out) {
249+
ConvolutionParams params;
250+
if (auto err = EthosnAPI::QnnConv2d(call->op.as<FunctionNode>()->body, &params)) {
251+
return err;
252+
}
253+
254+
auto activation = operand_table_[call->args[0]][0];
255+
auto weights = AddConstant(network_, params.weights_info, params.raw_weights).tensor;
256+
auto bias = AddConstant(network_, params.bias_info, params.raw_bias).tensor;
257+
try {
258+
if (params.is_depthwise) {
259+
*out = AddDepthwiseConvolution(network_, *activation, *bias, *weights, params.conv_info);
260+
} else {
261+
*out = AddConvolution(network_, *activation, *bias, *weights, params.conv_info);
262+
}
263+
} catch (const sl::NotSupportedException& e) {
264+
return EthosnError(e.what());
265+
}
266+
return EthosnError();
267+
}
268+
230269
EthosnError ConstructNetworkVisitor::MakeConcatenateLayer(const Call& call,
231270
sl::TensorAndId<sl::Operand>* out) {
232271
ConcatenateParams params;

src/relay/backend/contrib/ethosn/codegen_ethosn.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingP
197197
void VisitLeaf(const Expr& expr) final;
198198

199199
// Make a support library operand from a Call
200+
EthosnError MakeConvolutionLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
200201
EthosnError MakeConcatenateLayer(const Call& call, sl::TensorAndId<sl::Operand>* out);
201202
EthosnError MakeSplitLayer(const Call& call, sl::TensorsAndId* outs);
202203

src/relay/backend/contrib/ethosn/ethosn_api.cc

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,105 @@ namespace relay {
4040
namespace contrib {
4141
namespace ethosn {
4242

43+
EthosnError EthosnAPI::QnnConv2d(const Expr& expr, ConvolutionParams* params) {
44+
Call requantize = Downcast<Call>(expr);
45+
Call bias_add = Downcast<Call>(requantize->args[0]);
46+
Call conv = Downcast<Call>(bias_add->args[0]);
47+
Call pad;
48+
if (conv->args[0]->IsInstance<CallNode>() &&
49+
Downcast<Call>(conv->args[0])->op == Op::Get("nn.pad"))
50+
pad = Downcast<Call>(conv->args[0]);
51+
const auto& conv_attr = conv->attrs.as<Conv2DAttrs>();
52+
53+
// Extract the quantization params from the arguments
54+
int input_zero_point;
55+
int kernel_zero_point;
56+
int output_zero_point;
57+
float input_scale;
58+
float kernel_scale;
59+
float output_scale;
60+
EthosnError err = AsConstant<int>(conv->args[2], &input_zero_point);
61+
err += AsConstant<int>(conv->args[3], &kernel_zero_point);
62+
err += AsConstant<int>(requantize->args[4], &output_zero_point);
63+
err += AsConstant<float>(conv->args[4], &input_scale);
64+
err += AsConstant<float>(conv->args[5], &kernel_scale);
65+
err += AsConstant<float>(requantize->args[3], &output_scale);
66+
67+
// Convert quantization params
68+
sl::QuantizationInfo data_q_info;
69+
sl::QuantizationInfo weights_q_info;
70+
sl::QuantizationInfo bias_q_info;
71+
sl::QuantizationInfo output_q_info;
72+
err += Tvm2Npu(input_zero_point, input_scale, &data_q_info);
73+
err += Tvm2Npu(kernel_zero_point, kernel_scale, &weights_q_info);
74+
err += Tvm2Npu(0, data_q_info.m_Scale * weights_q_info.m_Scale, &bias_q_info);
75+
err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
76+
77+
// Convert convolution attributes
78+
sl::Padding padding;
79+
if (pad.defined()) {
80+
Tvm2Npu(conv_attr->padding, &padding);
81+
// Don't support both standalone operator padding and attribute defined padding
82+
if (padding != sl::Padding({0, 0, 0, 0})) {
83+
err += EthosnError(
84+
ErrStrm() << "both op and attr padding exist, must be either op/attr only or no padding");
85+
}
86+
err += Tvm2Npu(pad->attrs.as<PadAttrs>()->pad_width, &padding);
87+
} else {
88+
err += Tvm2Npu(conv_attr->padding, &padding);
89+
}
90+
sl::Stride stride;
91+
err += Tvm2Npu(conv_attr->strides, &stride);
92+
// Dilation is not supported
93+
std::array<uint32_t, 4> dilation = {1, 1, 1, 1};
94+
AsArray(conv_attr->dilation, &dilation);
95+
if (conv_attr->dilation.size() != 2 || dilation[0] != 1 || dilation[1] != 1) {
96+
err +=
97+
EthosnError(ErrStrm() << "dilation=" << conv_attr->dilation << ", dilation must = [1, 1]");
98+
}
99+
// Create convolution info
100+
params->conv_info = sl::ConvolutionInfo(padding, stride, output_q_info);
101+
102+
// Create data info
103+
const TensorTypeNode* data_dtype;
104+
if (pad.defined()) {
105+
data_dtype = pad->args[0]->checked_type().as<TensorTypeNode>();
106+
} else {
107+
data_dtype = conv->args[0]->checked_type().as<TensorTypeNode>();
108+
}
109+
sl::TensorShape activation_tensor_shape;
110+
sl::DataType activation_data_type;
111+
err += Tvm2Npu(data_dtype->shape, &activation_tensor_shape);
112+
err += Tvm2Npu(data_dtype->dtype, &activation_data_type);
113+
params->activation_info = sl::TensorInfo(activation_tensor_shape, activation_data_type,
114+
sl::DataFormat::NHWC, data_q_info);
115+
116+
// Create weights info
117+
params->is_depthwise = conv_attr->channels.defined() &&
118+
tvm::tir::ExprDeepEqual()(conv_attr->channels, conv_attr->groups) &&
119+
conv_attr->groups != 1;
120+
121+
const auto* weights_dtype = conv->args[1]->checked_type().as<TensorTypeNode>();
122+
sl::TensorShape weights_tensor_shape;
123+
sl::DataType weights_data_type;
124+
sl::DataFormat weights_data_format;
125+
// Ignore the error here because weights don't have a batch axis
126+
Tvm2Npu(weights_dtype->shape, &weights_tensor_shape);
127+
err += Tvm2Npu(weights_dtype->dtype, &weights_data_type);
128+
err += Tvm2Npu(params->is_depthwise ? "HWIM" : "HWIO", &weights_data_format);
129+
params->weights_info =
130+
sl::TensorInfo(weights_tensor_shape, weights_data_type, weights_data_format, weights_q_info);
131+
params->raw_weights = conv->args[1].as<ConstantNode>()->data->data;
132+
133+
// Create bias info
134+
params->bias_info = sl::TensorInfo(
135+
{1, 1, 1, params->is_depthwise ? weights_tensor_shape[2] : weights_tensor_shape[3]},
136+
sl::DataType::INT32_QUANTIZED, sl::DataFormat::NHWC, bias_q_info);
137+
params->raw_bias = bias_add->args[1].as<ConstantNode>()->data->data;
138+
139+
return err;
140+
}
141+
43142
EthosnError EthosnAPI::Concatenate(const Expr& expr, ConcatenateParams* params) {
44143
Call call = Downcast<Call>(expr);
45144
const auto& attrs = call->attrs.as<ConcatenateAttrs>();
@@ -107,6 +206,60 @@ EthosnError EthosnAPI::Split(const Expr& expr, SplitParams* params) {
107206
return err;
108207
}
109208

209+
EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding) {
210+
std::array<uint32_t, 4> dim;
211+
if (EthosnError err = AsArray<IndexExpr, uint32_t>(padding, &dim)) {
212+
return err;
213+
}
214+
switch (padding.size()) {
215+
case 1:
216+
*npu_padding = sl::Padding(dim[0], dim[0], dim[0], dim[0]);
217+
break;
218+
case 2:
219+
// Height, width -> top, bottom, left, right
220+
*npu_padding = sl::Padding(dim[0], dim[0], dim[1], dim[1]);
221+
break;
222+
case 4:
223+
// Top, left, bottom, right -> top, bottom, left, right
224+
*npu_padding = sl::Padding(dim[0], dim[2], dim[1], dim[3]);
225+
break;
226+
default:
227+
return EthosnError(ErrStrm() << "padding tuple size=" << padding.size()
228+
<< ", padding tuple size must be {1, 2, 4}");
229+
}
230+
return EthosnError();
231+
}
232+
233+
EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& strides, sl::Stride* npu_stride) {
234+
if (strides.size() != 2) {
235+
return EthosnError(ErrStrm() << "stride size=" << strides.size() << ", stride size must = 2");
236+
}
237+
std::array<uint32_t, 4> dim;
238+
if (EthosnError err = AsArray<IndexExpr, uint32_t>(strides, &dim)) {
239+
return err;
240+
}
241+
*npu_stride = sl::Stride(dim[1], dim[0]);
242+
return EthosnError();
243+
}
244+
245+
EthosnError EthosnAPI::Tvm2Npu(const std::string& dformat, sl::DataFormat* data_format) {
246+
if (dformat == "NCHW") {
247+
*data_format = sl::DataFormat::NCHW;
248+
return EthosnError();
249+
} else if (dformat == "NHWC") {
250+
*data_format = sl::DataFormat::NHWC;
251+
return EthosnError();
252+
} else if (dformat == "HWIO") {
253+
*data_format = sl::DataFormat::HWIO;
254+
return EthosnError();
255+
} else if (dformat == "HWIM") {
256+
*data_format = sl::DataFormat::HWIM;
257+
return EthosnError();
258+
}
259+
return EthosnError(ErrStrm() << "format=" << dformat
260+
<< ", format must be {NCHW, NHWC, HWIO, HWIM}");
261+
}
262+
110263
EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape) {
111264
EthosnError err = AsArray<IndexExpr, uint32_t>(shape, npu_shape);
112265
if (npu_shape->front() != 1) {
@@ -128,6 +281,29 @@ EthosnError EthosnAPI::Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_ty
128281
return EthosnError(ErrStrm() << "dtype=\'" << dtype << "\', dtype must be either uint8 or int32");
129282
}
130283

284+
EthosnError EthosnAPI::Tvm2Npu(int32_t zero_point, float scale, sl::QuantizationInfo* npu_qinfo) {
285+
*npu_qinfo = sl::QuantizationInfo(zero_point, scale);
286+
return EthosnError();
287+
}
288+
289+
EthosnError EthosnAPI::Tvm2Npu(const Array<Array<Integer>>& padding, sl::Padding* npu_padding) {
290+
if (padding.size() != 4) {
291+
return EthosnError(ErrStrm() << "padding tuple size=" << padding.size()
292+
<< ", padding tuple size must = 4");
293+
}
294+
Array<IndexExpr> reduced_padding;
295+
reduced_padding.push_back(padding[1][0]);
296+
reduced_padding.push_back(padding[1][1]);
297+
reduced_padding.push_back(padding[2][0]);
298+
reduced_padding.push_back(padding[2][1]);
299+
std::array<uint32_t, 4> dim;
300+
if (EthosnError err = AsArray<IndexExpr, uint32_t>(reduced_padding, &dim)) {
301+
return err;
302+
}
303+
*npu_padding = sl::Padding(dim[0], dim[1], dim[2], dim[3]);
304+
return EthosnError();
305+
}
306+
131307
// Convert an array of IntImmNodes into ValueT
132308
// IndexT type of Array indexing variable
133309
// ValueT type of resulting value
@@ -158,6 +334,20 @@ EthosnError EthosnAPI::AsConstant(const Expr& expr, T* out) {
158334
return EthosnError();
159335
}
160336

337+
TVM_REGISTER_GLOBAL("relay.ethos-n.support.conv2d")
338+
.set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
339+
Call call = args[0];
340+
ConvolutionParams params;
341+
auto err = EthosnAPI::QnnConv2d(call, &params);
342+
if (params.is_depthwise) {
343+
*rv = !err && sl::IsDepthwiseConvolutionSupported(params.bias_info, params.weights_info,
344+
params.conv_info, params.activation_info);
345+
} else {
346+
*rv = !err && sl::IsConvolutionSupported(params.bias_info, params.weights_info,
347+
params.conv_info, params.activation_info);
348+
}
349+
});
350+
161351
TVM_REGISTER_GLOBAL("relay.ethos-n.support.concatenate")
162352
.set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
163353
Call call = args[0];

src/relay/backend/contrib/ethosn/ethosn_api.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,16 @@ namespace ethosn {
4444

4545
namespace sl = ::ethosn::support_library;
4646

47+
struct ConvolutionParams {
48+
sl::ConvolutionInfo conv_info;
49+
sl::TensorInfo activation_info;
50+
sl::TensorInfo weights_info;
51+
sl::TensorInfo bias_info;
52+
void* raw_weights = nullptr;
53+
void* raw_bias = nullptr;
54+
bool is_depthwise = false;
55+
};
56+
4757
struct ConcatenateParams {
4858
sl::QuantizationInfo qInfo;
4959
sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo);
@@ -115,6 +125,8 @@ class EthosnError {
115125
*/
116126
class EthosnAPI {
117127
public:
128+
/*! \brief Extract the Support Library convolution params from an ethos-n.qnn_conv2d func */
129+
static EthosnError QnnConv2d(const Expr& expr, ConvolutionParams* params);
118130
/*! \brief Extract the Support Library concatenate params from a Relay qnn.concatenate call */
119131
static EthosnError Concatenate(const Expr& expr, ConcatenateParams* params);
120132
/*! \brief Extract the Support Library split params from a Relay split call */
@@ -125,6 +137,16 @@ class EthosnAPI {
125137
static EthosnError Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape);
126138
/*! \brief Convert a TVM data type to a SL data type */
127139
static EthosnError Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type);
140+
/*! \brief Convert TVM 1D padding to SL padding */
141+
static EthosnError Tvm2Npu(const Array<IndexExpr>& padding, sl::Padding* npu_padding);
142+
/*! \brief Convert TVM 1D striding to SL striding */
143+
static EthosnError Tvm2Npu(const Array<IndexExpr>& strides, sl::Stride* npu_stride);
144+
/*! \brief Convert TVM data format to SL data format */
145+
static EthosnError Tvm2Npu(const std::string& dformat, sl::DataFormat* data_format);
146+
/*! \brief Convert TVM quantization info to SL quantization info */
147+
static EthosnError Tvm2Npu(int32_t zero_point, float scale, sl::QuantizationInfo* npu_qinfo);
148+
/*! \brief Convert TVM 2D padding to SL padding */
149+
static EthosnError Tvm2Npu(const Array<Array<Integer>>& padding, sl::Padding* npu_padding);
128150

129151
// Convert an array of IntImmNodes into ValueT
130152
// IndexT type of Array indexing variable

tests/python/contrib/test_ethosn/infrastructure.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1):
9494
f = relay.build_module.bind_params_by_name(mod["main"], params)
9595
mod = tvm.IRModule()
9696
mod["main"] = f
97+
pattern = get_pattern_table("ethos-n")
98+
mod = relay.transform.MergeComposite(pattern)(mod)
9799
mod = relay.transform.AnnotateTarget("ethos-n")(mod)
98100
mod = relay.transform.MergeCompilerRegions()(mod)
99101
mod = relay.transform.PartitionGraph()(mod)

0 commit comments

Comments
 (0)