Skip to content

Commit 2cfbd09

Browse files
authored
[BYOC][ETHOSN] Fix tests for new module API (apache#6560)
* [BYOC][ETHOSN] Fix tests for new module API Some of the downstream variants of our tests had been broken by a recent change to the API of build. This both fixes that and refactors a couple of tests so that they will run entirely in upstream CI and we won't see this sort of failure again. Change-Id: I841266eef0e2e89cc76e0526fc6cd3fc8d1326d8 * Only run mobilenet Change-Id: Ie41c6d2c13c4473ecaa5c50c33d2c1589c742796 * Improve docs Change-Id: I2c8bde44278e4cbc9cea5c5cbd4bb3c316ec37ae * More docs Change-Id: Ia9973915eecea647689535cc1e6eef9228111324
1 parent 0535fd1 commit 2cfbd09

File tree

4 files changed

+107
-37
lines changed

4 files changed

+107
-37
lines changed

src/runtime/contrib/ethosn/ethosn_device.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network,
174174
* it's called.
175175
*/
176176

177-
#include <tvm/runtime/ndarray.h>
178177
#include <tvm/runtime/registry.h>
179178

180179
namespace tvm {
@@ -188,7 +187,7 @@ std::vector<tvm::runtime::NDArray> test_outputs;
188187
TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result")
189188
.set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) {
190189
test_outputs.clear();
191-
for (int argc = 1; argc < args.size(); argc++) {
190+
for (int argc = 0; argc < args.size(); argc++) {
192191
const DLTensor* tensor = args[argc];
193192
auto shape = std::vector<int64_t>(tensor->shape, tensor->shape + tensor->ndim);
194193
test_outputs.emplace_back(tvm::runtime::NDArray::Empty(shape, tensor->dtype, tensor->ctx));

tests/python/contrib/test_ethosn/infrastructure.py

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,34 @@ def get_real_image(im_height, im_width):
4343

4444

4545
def assert_lib_hash(lib, golden):
46+
"""Check that the Ethos-N runtime modules in a library hash to the same values
47+
as given by the golden hash(es).
48+
49+
If there's only one Ethos-N module, the golden hash may be provided as a str.
50+
If there's multiple, a set of golden hashes should be provided to correspond
51+
with each Ethos-N module that is expected.
52+
53+
This function is used to ensure that no change is made which alters the output
54+
of a compilation. If such a change is made deliberately (eg. to fix a bug) then
55+
the golden hash should be updated after verifying on hardware that the behaviour
56+
is still correct.
57+
58+
This method is used because of the lack of hardware availability in upstream CI.
59+
"""
60+
# Convert str hash into a set of hashes
61+
if isinstance(golden, str):
62+
golden = {golden}
63+
4664
temp = util.tempdir()
4765
path = temp.relpath("lib.cmm")
48-
lib.imported_modules[1].save(path)
49-
lib_hash = md5(open(path, "rb").read()).hexdigest()
50-
assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, lib_hash)
66+
hash_set = set()
67+
for mod in lib.imported_modules:
68+
if mod.type_key == "ethos-n":
69+
mod.save(path)
70+
lib_hash = md5(open(path, "rb").read()).hexdigest()
71+
hash_set.add(lib_hash)
72+
73+
assert hash_set == golden, "Expected hash: {} Got hash: {}".format(golden, hash_set)
5174

5275

5376
def make_module(func, params):
@@ -102,6 +125,21 @@ def visit_call(self, call):
102125

103126

104127
def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1):
128+
"""Build a network with or without Ethos-N offloading.
129+
130+
Parameters
131+
----------
132+
mod : IRModule
133+
The Relay module to build.
134+
params : dict of str to NDArray
135+
The weights to build with.
136+
npu : bool, optional
137+
Whether to build with Ethos-N offloading.
138+
expected_host_ops : int, optional
139+
The number of ops expected to remain on the host.
140+
npu_partitions : int, optional
141+
The number of Ethos-N partitions expected.
142+
"""
105143
relay.backend.compile_engine.get().clear()
106144
with tvm.transform.PassContext(
107145
opt_level=3, config={"relay.ext.ethos-n.options": {"variant": 0}}
@@ -133,6 +171,28 @@ def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1):
133171

134172

135173
def run(lib, inputs, outputs, npu=True):
174+
"""Run a module with specified inputs.
175+
176+
Parameters
177+
----------
178+
lib : runtime.Module
179+
The runtime module.
180+
inputs : dict of str to NDArray
181+
The input dictionary.
182+
outputs : int
183+
The expected number of outputs.
184+
npu : bool
185+
Whether or not any part of the lib is offloaded to Ethos-N.
186+
If it's false (i.e. it's all running on the CPU), we set
187+
the mocked result equal to the output so that a subsequent
188+
mocked run on the NPU returns the same value.
189+
190+
Returns
191+
-------
192+
out : list of NDArray
193+
The results.
194+
195+
"""
136196
# Export and load lib to confirm this works
137197
lib_name = "mod.so"
138198
temp = util.tempdir()
@@ -144,7 +204,7 @@ def run(lib, inputs, outputs, npu=True):
144204
module.run()
145205
out = [module.get_output(i) for i in range(outputs)]
146206
if not npu:
147-
inference_result(0, out)
207+
inference_result(out)
148208
return out
149209

150210

@@ -171,12 +231,12 @@ def verify(answers, atol, rtol=1e-07, verify_saturation=True):
171231
tvm.testing.assert_allclose(outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol)
172232

173233

174-
def inference_result(checksum, outputs):
234+
def inference_result(outputs):
175235
"""Set the expected results of an Ethos inference, if the testing
176236
infrastructure is available. This assumes that the entire graph
177237
was offloaded to the neural processor."""
178238
if tvm.get_global_func("relay.ethos-n.test.infra.inference_result", True):
179-
return _infrastructure.inference_result(checksum, *outputs)
239+
return _infrastructure.inference_result(*outputs)
180240
return False
181241

182242

tests/python/contrib/test_ethosn/test_networks.py

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
pytest.importorskip("tensorflow")
2424

2525
from tvm import relay
26-
from tvm.relay.op.contrib.ethosn import ethosn_available, Available
26+
from tvm.relay.op.contrib.ethosn import ethosn_available
2727
from tvm.contrib import download
2828
import tvm.relay.testing.tf as tf_testing
2929
import tflite.Model
@@ -58,10 +58,36 @@ def _test_image_network(
5858
input_dict,
5959
compile_hash,
6060
output_count,
61-
run=True,
6261
host_ops=0,
6362
npu_partitions=1,
63+
run=False,
6464
):
65+
"""Test an image network.
66+
67+
Parameters
68+
----------
69+
model_url : str
70+
The URL to the model.
71+
model_sub_path : str
72+
The name of the model file.
73+
input_dict : dict
74+
The input dict.
75+
compile_hash : str, set
76+
The compile hash(es) to check the compilation output against.
77+
output_count : int
78+
The expected number of outputs.
79+
host_ops : int
80+
The expected number of host operators.
81+
npu_partitions : int
82+
The expected number of Ethos-N partitions.
83+
run : bool
84+
Whether or not to try running the network. If hardware isn't
85+
available, the run will still take place but with a mocked
86+
inference function, so the results will be incorrect. This is
87+
therefore just to test the runtime flow is working rather than
88+
to check the correctness/accuracy.
89+
90+
"""
6591
if not ethosn_available():
6692
return
6793

@@ -78,24 +104,16 @@ def get_model():
78104
)
79105
return _get_tflite_model(model_path, input_dict, "uint8")
80106

81-
outputs = []
82107
inputs = {}
83108
for input_name in input_dict:
84109
input_shape = input_dict[input_name]
85110
inputs[input_name] = tei.get_real_image(input_shape[1], input_shape[2])
86111

87-
for npu in [False, True]:
88-
mod, params = get_model()
89-
graph, lib, params = tei.build(
90-
mod, params, npu=npu, expected_host_ops=host_ops, npu_partitions=npu_partitions
91-
)
92-
if npu:
93-
tei.assert_lib_hash(lib, compile_hash)
94-
if run:
95-
outputs.append(tei.run(graph, lib, params, inputs, output_count, npu=npu))
96-
112+
mod, params = get_model()
113+
m = tei.build(mod, params, npu=True, expected_host_ops=host_ops, npu_partitions=npu_partitions)
114+
tei.assert_lib_hash(m.get_lib(), compile_hash)
97115
if run:
98-
tei.verify(outputs, 1, verify_saturation=False)
116+
tei.run(m, inputs, output_count, npu=True)
99117

100118

101119
def test_mobilenet_v1():
@@ -104,17 +122,16 @@ def test_mobilenet_v1():
104122
# codegen, which could come about from either a change in Support Library
105123
# version or a change in the Ethos-N codegen. To update this requires running
106124
# on hardware that isn't available in CI.
107-
hw = ethosn_available()
108125
_test_image_network(
109126
model_url="https://storage.googleapis.com/download.tensorflow.org/"
110127
"models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
111128
model_sub_path="mobilenet_v1_1.0_224_quant.tflite",
112129
input_dict={"input": (1, 224, 224, 3)},
113130
compile_hash="81637c89339201a07dc96e3b5dbf836a",
114131
output_count=1,
115-
run=(hw == Available.SW_AND_HW),
116132
host_ops=3,
117133
npu_partitions=1,
134+
run=True,
118135
)
119136

120137

@@ -131,7 +148,6 @@ def test_inception_v3():
131148
input_dict={"input": (1, 299, 299, 3)},
132149
compile_hash="de0e175af610ebd45ccb03d170dc9664",
133150
output_count=1,
134-
run=False,
135151
host_ops=0,
136152
npu_partitions=1,
137153
)
@@ -150,7 +166,6 @@ def test_inception_v4():
150166
input_dict={"input": (1, 299, 299, 3)},
151167
compile_hash="06bf6cb56344f3904bcb108e54edfe87",
152168
output_count=1,
153-
run=False,
154169
host_ops=3,
155170
npu_partitions=1,
156171
)
@@ -167,9 +182,8 @@ def test_ssd_mobilenet_v1():
167182
"models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip",
168183
model_sub_path="detect.tflite",
169184
input_dict={"normalized_input_image_tensor": (1, 300, 300, 3)},
170-
compile_hash="6211d96103880b016baa85e638abddef",
185+
compile_hash={"29aec6b184b09454b4323271aadf89b1", "6211d96103880b016baa85e638abddef"},
171186
output_count=4,
172-
run=False,
173187
host_ops=28,
174188
npu_partitions=2,
175189
)

tests/python/contrib/test_ethosn/test_topologies.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def test_multiple_command_streams():
8080
simple graph which creates two Ethos-N partitions and checks the result
8181
against an 'all-CPU' run through TVM.
8282
"""
83-
if ethosn_available() != Available.SW_AND_HW:
83+
if not ethosn_available():
8484
return
8585

8686
def get_model():
@@ -100,14 +100,11 @@ def get_model():
100100
np.random.seed(0)
101101
outputs = []
102102
inputs = {"x": tvm.nd.array(np.random.randint(0, high=256, size=(1, 4, 4, 4), dtype="uint8"))}
103-
for npu in [False, True]:
104-
model = get_model()
105-
mod = tei.make_module(model, {})
106-
outputs.append(
107-
tei.build_and_run(mod, inputs, 1, {}, npu=npu, expected_host_ops=1, npu_partitions=2)
108-
)
109-
110-
tei.verify(outputs, 0)
103+
model = get_model()
104+
mod = tei.make_module(model, {})
105+
outputs.append(
106+
tei.build_and_run(mod, inputs, 1, {}, npu=True, expected_host_ops=1, npu_partitions=2)
107+
)
111108

112109

113110
def test_output_order():

0 commit comments

Comments
 (0)