From 5da0b7bfa7511b0e452de5ad9f20f04c9aa9d0ea Mon Sep 17 00:00:00 2001 From: Michael Sharp Date: Tue, 2 Jun 2020 12:39:09 -0700 Subject: [PATCH 1/5] fixes the mapping so its correct with the output columns between ML.Net and ONNX itself. --- .../OnnxTransform.cs | 11 +-- .../OnnxTransformTests.cs | 72 +++++++++++++++++++ 2 files changed, 78 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index cc0d782e66..f3e6dd369f 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -467,8 +467,9 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput(i)).ToArray(); + var activeIndex = _parent.Model.ModelInfo.OutputNames.IndexOf(_parent.Outputs[iinfo]); - if (_parent.Model.ModelInfo.OutputsInfo[iinfo].DataViewType is VectorDataViewType vectorType) + if (_parent.Model.ModelInfo.OutputsInfo[activeIndex].DataViewType is VectorDataViewType vectorType) { var elemRawType = vectorType.ItemType.RawType; var srcNamedValueGetters = GetNamedOnnxValueGetters(input, _inputColIndices, _inputOnnxTypes, _inputTensorShapes); @@ -479,9 +480,9 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func, type, input, iinfo, srcNamedValueGetters, activeOutputColNames); + return Utils.MarshalInvoke(MakeObjectGetter, type, input, iinfo, srcNamedValueGetters, activeOutputColNames, activeIndex); } } @@ -558,7 +559,7 @@ private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnx return valueGetter; } - private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames) + private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, int activeIndex) { Host.AssertValue(input); var outputCache = new OnnxRuntimeOutputCacher(); @@ -567,7 +568,7 @@ private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxVal UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCache); var namedOnnxValue = outputCache.Outputs[_parent.Outputs[iinfo]]; var trueValue = namedOnnxValue.AsEnumerable().Select(value => value.AsDictionary()); - var caster = _parent.Model.ModelInfo.OutputsInfo[iinfo].Caster; + var caster = _parent.Model.ModelInfo.OutputsInfo[activeIndex].Caster; dst = (T)caster(namedOnnxValue); }; return valueGetter; diff --git a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs index 1230288504..b535be28e2 100644 --- a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs +++ b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs @@ -332,6 +332,78 @@ public void OnnxModelMultiInput() } } + [OnnxFact] + public void OnnxModelOutputDifferentOrder() + { + var modelFile = Path.Combine(Directory.GetCurrentDirectory(), "twoinput", "twoinput.onnx"); + var env = new ConsoleEnvironment(seed: 1); + var samplevector = GetSampleArrayData(); + + var dataView = ML.Data.LoadFromEnumerable( + new TestDataMulti[] { + new TestDataMulti() + { + ina = new float[] {1,2,3,4,5}, + inb = new float[] {1,2,3,4,5} + } + }); + // The model returns the output columns in the order outa, outb. We are doing the opposite here, making sure the name mapping is correct. + var onnx = ML.Transforms.ApplyOnnxModel(new[] { "outb", "outa" }, new[] { "ina", "inb" }, modelFile).Fit(dataView).Transform(dataView); + + var outaCol = onnx.Schema["outa"]; + var outbCol = onnx.Schema["outb"]; + using (var curs = onnx.GetRowCursor(outaCol, onnx.Schema["outb"])) + { + var getScoresa = curs.GetGetter>(outaCol); + var getScoresb = curs.GetGetter>(outbCol); + var buffera = default(VBuffer); + var bufferb = default(VBuffer); + + while (curs.MoveNext()) + { + getScoresa(ref buffera); + getScoresb(ref bufferb); + Assert.Equal(5, buffera.Length); + Assert.Equal(5, bufferb.Length); + Assert.Equal(0, buffera.GetValues().ToArray().Sum()); + Assert.Equal(30, bufferb.GetValues().ToArray().Sum()); + } + } + } + + [OnnxFact] + public void OnnxModelOutputSubset() + { + var modelFile = Path.Combine(Directory.GetCurrentDirectory(), "twoinput", "twoinput.onnx"); + var env = new ConsoleEnvironment(seed: 1); + var samplevector = GetSampleArrayData(); + + var dataView = ML.Data.LoadFromEnumerable( + new TestDataMulti[] { + new TestDataMulti() + { + ina = new float[] {1,2,3,4,5}, + inb = new float[] {1,2,3,4,5} + } + }); + // The model returns the output columns in the order outa, outb. We are doing only a subset, outb, to make sure the mapping works. + var onnx = ML.Transforms.ApplyOnnxModel(new[] { "outb"}, new[] { "ina", "inb" }, modelFile).Fit(dataView).Transform(dataView); + + var outbCol = onnx.Schema["outb"]; + using (var curs = onnx.GetRowCursor(outbCol)) + { + var getScoresb = curs.GetGetter>(outbCol); + var bufferb = default(VBuffer); + + while (curs.MoveNext()) + { + getScoresb(ref bufferb); + Assert.Equal(5, bufferb.Length); + Assert.Equal(30, bufferb.GetValues().ToArray().Sum()); + } + } + } + [OnnxFact] public void TestUnknownDimensions() { From 1815de69f093d059f17a308de4fd1c7d4da9a9fc Mon Sep 17 00:00:00 2001 From: Michael Sharp Date: Tue, 2 Jun 2020 14:41:17 -0700 Subject: [PATCH 2/5] updates based on PR comments --- .../OnnxTransformTests.cs | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs index b535be28e2..2a25c85909 100644 --- a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs +++ b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs @@ -336,8 +336,6 @@ public void OnnxModelMultiInput() public void OnnxModelOutputDifferentOrder() { var modelFile = Path.Combine(Directory.GetCurrentDirectory(), "twoinput", "twoinput.onnx"); - var env = new ConsoleEnvironment(seed: 1); - var samplevector = GetSampleArrayData(); var dataView = ML.Data.LoadFromEnumerable( new TestDataMulti[] { @@ -369,27 +367,11 @@ public void OnnxModelOutputDifferentOrder() Assert.Equal(30, bufferb.GetValues().ToArray().Sum()); } } - } - - [OnnxFact] - public void OnnxModelOutputSubset() - { - var modelFile = Path.Combine(Directory.GetCurrentDirectory(), "twoinput", "twoinput.onnx"); - var env = new ConsoleEnvironment(seed: 1); - var samplevector = GetSampleArrayData(); - var dataView = ML.Data.LoadFromEnumerable( - new TestDataMulti[] { - new TestDataMulti() - { - ina = new float[] {1,2,3,4,5}, - inb = new float[] {1,2,3,4,5} - } - }); // The model returns the output columns in the order outa, outb. We are doing only a subset, outb, to make sure the mapping works. - var onnx = ML.Transforms.ApplyOnnxModel(new[] { "outb"}, new[] { "ina", "inb" }, modelFile).Fit(dataView).Transform(dataView); + onnx = ML.Transforms.ApplyOnnxModel(new[] { "outb" }, new[] { "ina", "inb" }, modelFile).Fit(dataView).Transform(dataView); - var outbCol = onnx.Schema["outb"]; + outbCol = onnx.Schema["outb"]; using (var curs = onnx.GetRowCursor(outbCol)) { var getScoresb = curs.GetGetter>(outbCol); From 4cbd19f4d724c5053d1d3f76840c25df018248a8 Mon Sep 17 00:00:00 2001 From: Michael Sharp Date: Tue, 2 Jun 2020 15:12:14 -0700 Subject: [PATCH 3/5] added in method to get mapping and added more comments --- .../OnnxTransform.cs | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index f3e6dd369f..bd2a86f09e 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -340,6 +340,17 @@ private static IEnumerable AdjustDimensions(OnnxShape shape) return new[] { 1 }; } + /// + /// In the case that the ML.Net user wants a subset of columns or lists the columns in a different order then specified in the ONNX model, + /// we need to map from the ONNX model index to the ML.Net column index. This method does that mapping for us. + /// + /// The index of the ML.Net column requested. + /// The index o fht e + internal int MapDataViewColumnToOnnxOutputTensor(int iinfo) + { + return Model.ModelInfo.OutputNames.IndexOf(Outputs[iinfo]); + } + private sealed class Mapper : MapperBase { private readonly OnnxTransformer _parent; @@ -467,9 +478,11 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput(i)).ToArray(); - var activeIndex = _parent.Model.ModelInfo.OutputNames.IndexOf(_parent.Outputs[iinfo]); - if (_parent.Model.ModelInfo.OutputsInfo[activeIndex].DataViewType is VectorDataViewType vectorType) + // Determine the mapping from the ML.Net column index to the ONNX output tensor index. + var mlnetOnnxMapping = _parent.MapDataViewColumnToOnnxOutputTensor(iinfo); + + if (_parent.Model.ModelInfo.OutputsInfo[mlnetOnnxMapping].DataViewType is VectorDataViewType vectorType) { var elemRawType = vectorType.ItemType.RawType; var srcNamedValueGetters = GetNamedOnnxValueGetters(input, _inputColIndices, _inputOnnxTypes, _inputTensorShapes); @@ -480,9 +493,9 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func, type, input, iinfo, srcNamedValueGetters, activeOutputColNames, activeIndex); + return Utils.MarshalInvoke(MakeObjectGetter, type, input, iinfo, srcNamedValueGetters, activeOutputColNames, mlnetOnnxMapping); } } @@ -559,7 +572,7 @@ private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnx return valueGetter; } - private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, int activeIndex) + private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, int mlnetOnnxMapping) { Host.AssertValue(input); var outputCache = new OnnxRuntimeOutputCacher(); @@ -568,7 +581,7 @@ private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxVal UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCache); var namedOnnxValue = outputCache.Outputs[_parent.Outputs[iinfo]]; var trueValue = namedOnnxValue.AsEnumerable().Select(value => value.AsDictionary()); - var caster = _parent.Model.ModelInfo.OutputsInfo[activeIndex].Caster; + var caster = _parent.Model.ModelInfo.OutputsInfo[mlnetOnnxMapping].Caster; dst = (T)caster(namedOnnxValue); }; return valueGetter; From d363cc7bc3f835a58d53f090a2d8b418a4200181 Mon Sep 17 00:00:00 2001 From: Michael Sharp Date: Wed, 3 Jun 2020 09:06:46 -0700 Subject: [PATCH 4/5] changes based on pr comments --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index bd2a86f09e..e1b9e5ffea 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -479,10 +479,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput(i)).ToArray(); - // Determine the mapping from the ML.Net column index to the ONNX output tensor index. - var mlnetOnnxMapping = _parent.MapDataViewColumnToOnnxOutputTensor(iinfo); - - if (_parent.Model.ModelInfo.OutputsInfo[mlnetOnnxMapping].DataViewType is VectorDataViewType vectorType) + if (_parent.Model.ModelInfo.OutputsInfo[_parent.MapDataViewColumnToOnnxOutputTensor(iinfo)].DataViewType is VectorDataViewType vectorType) { var elemRawType = vectorType.ItemType.RawType; var srcNamedValueGetters = GetNamedOnnxValueGetters(input, _inputColIndices, _inputOnnxTypes, _inputTensorShapes); @@ -493,9 +490,9 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func, type, input, iinfo, srcNamedValueGetters, activeOutputColNames, mlnetOnnxMapping); + return Utils.MarshalInvoke(MakeObjectGetter, type, input, iinfo, srcNamedValueGetters, activeOutputColNames); } } @@ -572,7 +569,7 @@ private Delegate MakeStringTensorGetter(DataViewRow input, int iinfo, INamedOnnx return valueGetter; } - private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames, int mlnetOnnxMapping) + private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxValueGetter[] srcNamedValueGetters, string[] activeOutputColNames) { Host.AssertValue(input); var outputCache = new OnnxRuntimeOutputCacher(); @@ -581,7 +578,7 @@ private Delegate MakeObjectGetter(DataViewRow input, int iinfo, INamedOnnxVal UpdateCacheIfNeeded(input.Position, srcNamedValueGetters, activeOutputColNames, outputCache); var namedOnnxValue = outputCache.Outputs[_parent.Outputs[iinfo]]; var trueValue = namedOnnxValue.AsEnumerable().Select(value => value.AsDictionary()); - var caster = _parent.Model.ModelInfo.OutputsInfo[mlnetOnnxMapping].Caster; + var caster = _parent.Model.ModelInfo.OutputsInfo[_parent.MapDataViewColumnToOnnxOutputTensor(iinfo)].Caster; dst = (T)caster(namedOnnxValue); }; return valueGetter; From 50c979f2fa1ffd8b832d1050189e2fba4d4d3725 Mon Sep 17 00:00:00 2001 From: Michael Sharp Date: Wed, 3 Jun 2020 10:44:42 -0700 Subject: [PATCH 5/5] comment changes due to pr comments --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index e1b9e5ffea..2cfb8f6366 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -342,10 +342,10 @@ private static IEnumerable AdjustDimensions(OnnxShape shape) /// /// In the case that the ML.Net user wants a subset of columns or lists the columns in a different order then specified in the ONNX model, - /// we need to map from the ONNX model index to the ML.Net column index. This method does that mapping for us. + /// we need to map from the ML.Net dataview column index to the ONNX model output index. This method does that mapping. /// /// The index of the ML.Net column requested. - /// The index o fht e + /// The index of ONNX output. internal int MapDataViewColumnToOnnxOutputTensor(int iinfo) { return Model.ModelInfo.OutputNames.IndexOf(Outputs[iinfo]);