Skip to content

Commit 6ccf479

Browse files
ptelmanjustinormontantoniovs1029
authored
Auto.ML: Fix issue when parsing float string fails on pl-PL culture set using Regression Experiment (#5163)
* Fix issue when parsing float string fails on pl-PL culture set * Added InvariantCulture float parsing as per CodeReview request * Update src/Microsoft.ML.AutoML/Sweepers/SweeperProbabilityUtils.cs Co-authored-by: Justin Ormont <[email protected]> * Update Parameters.cs * Added PL test * Added multiple cultures * debugging CI failure * Debug runSpecific * Revert "Debug runSpecific" This reverts commit 95b7280. * Removed LightGBM and addressed comments * Increased time * Increase time * Increased time Co-authored-by: Justin Ormont <[email protected]> Co-authored-by: Antonio Velazquez <[email protected]>
1 parent a9ab7fc commit 6ccf479

File tree

3 files changed

+61
-18
lines changed

3 files changed

+61
-18
lines changed

src/Microsoft.ML.AutoML/Sweepers/Parameters.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public LongParameterValue(string name, long value)
8383
{
8484
_name = name;
8585
_value = value;
86-
_valueText = _value.ToString("D");
86+
_valueText = _value.ToString("D", CultureInfo.InvariantCulture);
8787
}
8888

8989
public bool Equals(IParameterValue other)

src/Microsoft.ML.AutoML/Sweepers/SweeperProbabilityUtils.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78
using Microsoft.ML.Internal.CpuMath;
89

910
namespace Microsoft.ML.AutoML
@@ -98,13 +99,15 @@ public static float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, Pa
9899
}
99100
else if (sweepParam is LongValueGenerator lvg)
100101
{
102+
var longValue = GetIfIParameterValueOfT<long>(pset) ?? long.Parse(pset.ValueText, CultureInfo.InvariantCulture);
101103
// Normalizing all numeric parameters to [0,1] range.
102-
result.Add(lvg.NormalizeValue(new LongParameterValue(pset.Name, long.Parse(pset.ValueText))));
104+
result.Add(lvg.NormalizeValue(new LongParameterValue(pset.Name, longValue)));
103105
}
104106
else if (sweepParam is FloatValueGenerator fvg)
105107
{
108+
var floatValue = GetIfIParameterValueOfT<float>(pset) ?? float.Parse(pset.ValueText, CultureInfo.InvariantCulture);
106109
// Normalizing all numeric parameters to [0,1] range.
107-
result.Add(fvg.NormalizeValue(new FloatParameterValue(pset.Name, float.Parse(pset.ValueText))));
110+
result.Add(fvg.NormalizeValue(new FloatParameterValue(pset.Name, floatValue)));
108111
}
109112
else
110113
{
@@ -115,6 +118,10 @@ public static float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, Pa
115118
return result.ToArray();
116119
}
117120

121+
private static T? GetIfIParameterValueOfT<T>(IParameterValue parameterValue)
122+
where T : struct =>
123+
parameterValue is IParameterValue<T> pvt ? pvt.Value : default(T?);
124+
118125
public static ParameterSet FloatArrayAsParameterSet(IValueGenerator[] sweepParams, float[] array, bool expandedCategoricals = true)
119126
{
120127
Runtime.Contracts.Assert(array.Length == sweepParams.Length);

test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System;
6+
using System.Globalization;
57
using System.Linq;
8+
using System.Threading;
69
using Microsoft.ML.Data;
710
using Microsoft.ML.TestFramework;
811
using Microsoft.ML.TestFramework.Attributes;
@@ -102,22 +105,55 @@ private void Context_Log(object sender, LoggingEventArgs e)
102105
//throw new NotImplementedException();
103106
}
104107

105-
[Fact]
106-
public void AutoFitRegressionTest()
108+
[Theory]
109+
[InlineData("en-US")]
110+
[InlineData("ar-SA")]
111+
[InlineData("pl-PL")]
112+
public void AutoFitRegressionTest(string culture)
107113
{
108-
var context = new MLContext(1);
109-
var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset();
110-
var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel);
111-
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
112-
var trainData = textLoader.Load(dataPath);
113-
var validationData = context.Data.TakeRows(trainData, 20);
114-
trainData = context.Data.SkipRows(trainData, 20);
115-
var result = context.Auto()
116-
.CreateRegressionExperiment(0)
117-
.Execute(trainData, validationData,
118-
new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel });
114+
var originalCulture = Thread.CurrentThread.CurrentCulture;
115+
try
116+
{
117+
Thread.CurrentThread.CurrentCulture = new CultureInfo(culture);
118+
119+
// If users run AutoML with a different locale, sometimes
120+
// the sweeper encounters problems when parsing some strings.
121+
// So testing in another culture is necessary.
122+
// Furthermore, these issues might only occur after ~70
123+
// iterations, so more experiment time is needed for this to
124+
// occur.
125+
uint experimentTime = (uint) (culture == "en-US" ? 0 : 180);
126+
127+
var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime};
128+
if (!Environment.Is64BitProcess)
129+
{
130+
// LightGBM isn't available on x86 machines
131+
experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm);
132+
}
133+
134+
var context = new MLContext(1);
135+
var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset();
136+
var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel);
137+
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
138+
var trainData = textLoader.Load(dataPath);
139+
var validationData = context.Data.TakeRows(trainData, 20);
140+
trainData = context.Data.SkipRows(trainData, 20);
141+
var result = context.Auto()
142+
.CreateRegressionExperiment(experimentSettings)
143+
.Execute(trainData, validationData,
144+
new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel });
145+
146+
Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9));
147+
148+
// Ensure experimentTime allows enough iterations to fully test the internationalization code
149+
// If the below assertion fails, increase the experiment time so the number of iterations is met
150+
Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75");
119151

120-
Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9));
152+
}
153+
finally
154+
{
155+
Thread.CurrentThread.CurrentCulture = originalCulture;
156+
}
121157
}
122158

123159
[LightGBMFact]
@@ -351,4 +387,4 @@ private TextLoader.Options GetLoaderArgsRank(string labelColumnName, string grou
351387
};
352388
}
353389
}
354-
}
390+
}

0 commit comments

Comments
 (0)