Skip to content

Remove obsolete code in BinaryClassifierEvaluator #4694

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 27, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 0 additions & 175 deletions src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1457,184 +1457,9 @@ private bool TryGetPrMetrics(Dictionary<string, IDataView>[] metrics, out IDataV
if (metrics.Length != 1)
pr = AppendRowsDataView.Create(Host, prList[0].Schema, prList.ToArray());

#if !CORECLR
SavePrPlots(prList);
#endif
return true;
}

#if !CORECLR
// Vertical averaging.
private void SavePrPlots(List<IDataView> prList)
{
Host.AssertNonEmpty(prList);

//PR curve
var prPlot = new XYPlot();
prPlot.LegendX = "Recall";
prPlot.LegendY = "Precision";
prPlot.MinX = 0;
prPlot.MaxX = 1;
prPlot.MinY = 0;
prPlot.MaxY = 1;
prPlot.InitializeChart(addLegend: false);

var avgPoints = GetCurve(prList, BinaryClassifierEvaluator.Recall, BinaryClassifierEvaluator.Precision, 1);

prPlot.AddCurveXY(avgPoints, "");
if (prList.Count > 1)
{
var decimated = new List<XYPlot.XYPoint>();
double currentX = 0.0;
const double increment = 0.1;
foreach (var t in avgPoints.OrderBy(q => q.X))
{
if (t.X >= currentX)
{
decimated.Add(t);
currentX += increment;
}
}
prPlot.AddMarkerXYErr(decimated, "");
}

string basename = _prFileName;
if (basename.Length > 4 && basename[basename.Length - 4] == '.')
basename = basename.Substring(0, basename.Length - 4);

prPlot.Save(basename + ".pr.jpg");

avgPoints = GetCurve(prList, BinaryClassifierEvaluator.FalsePositiveRate, BinaryClassifierEvaluator.Recall);

//ROC curve
var rocPlot = new XYPlot();
rocPlot.LegendX = "FPR";
rocPlot.LegendY = "Recall=TPR";
rocPlot.MinX = 0;
rocPlot.MaxX = 1;
rocPlot.MinY = 0;
rocPlot.MaxY = 1;
rocPlot.InitializeChart(addLegend: false);

rocPlot.AddCurveXY(avgPoints, "");
if (prList.Count > 1)
{
var decimated = new List<XYPlot.XYPoint>();
double currentX = 0.0;
double increment = 0.1;
foreach (var t in avgPoints.OrderBy(q => q.X))
{
if (t.X >= currentX)
{
decimated.Add(t);
currentX += increment;
}
}
rocPlot.AddMarkerXYErr(decimated, "");
}
rocPlot.Save(basename + ".roc.jpg");
}

private List<XYPlot.XYPoint> GetCurve(List<IDataView> prList, string xAxisName, string yAxisName, Double yInit = 0)
{
var cursors = new IRowCursor[prList.Count];
var xGetters = new ValueGetter<Double>[prList.Count];
var yGetters = new ValueGetter<Double>[prList.Count];
for (int i = 0; i < prList.Count; i++)
{
int xIndex;
if (!prList[i].Schema.TryGetColumnIndex(xAxisName, out xIndex))
throw Host.Except("Data view does not contain column '{0}'", xAxisName);
int yIndex;
if (!prList[i].Schema.TryGetColumnIndex(yAxisName, out yIndex))
throw Host.Except("Data view does not contain column '{0}'", yAxisName);

cursors[i] = prList[i].GetRowCursor(col => col == xIndex || col == yIndex);
xGetters[i] = cursors[i].GetGetter<Double>(xIndex);
yGetters[i] = cursors[i].GetGetter<Double>(yIndex);
}

var avgPoints = new List<XYPlot.XYPoint>();

var xPrev = new Double[prList.Count];
var xCur = new Double[prList.Count];
var yPrev = new Double[prList.Count];
var yCur = new Double[prList.Count];
if (yInit != 0)
{
for (int i = 0; i < yPrev.Length; i++)
yPrev[i] = yInit;
}

// Get the first points in all the curves.
for (int i = 0; i < cursors.Length; i++)
{
if (cursors[i].MoveNext())
{
xGetters[i](ref xCur[i]);
yGetters[i](ref yCur[i]);
}
}

while (true)
{
// Find the next point as the point with the smallest x value, among the cursors that are not done.
int argMin = -1;
Double min = 2;
for (int i = 0; i < cursors.Length; i++)
{
if (cursors[i].State == CursorState.Done)
continue;

if (xCur[i] < min)
{
min = xCur[i];
argMin = i;
}
}

// We stop when all the cursors are done.
if (argMin < 0)
break;

// Calculate the average and std deviation of y value at x=min.
// Use StdDev = Sqrt(Avg(y^2)-Avg(y)^2), then stdErr = stdDev/Sqrt(sample size)
var yAvg = yCur[argMin];
var yVar = yCur[argMin] * yCur[argMin];
for (int i = 0; i < yCur.Length; i++)
{
if (i == argMin)
continue;

var deltaPos = xCur[i] - xCur[argMin];
var deltaNeg = xCur[argMin] - xPrev[i];
var currentY = (deltaPos * yPrev[i] + deltaNeg * yCur[i]) / (deltaPos + deltaNeg);
yAvg += currentY;
yVar += currentY * currentY;
}
yAvg /= prList.Count;
yVar = yVar / prList.Count - yAvg * yAvg;
var yStdErr = Math.Sqrt(Math.Max(0.0, yVar)) / Math.Sqrt(prList.Count);
avgPoints.Add(new XYPlot.XYPoint(min, yAvg, yStdErr));

// Advanced the cursor whose x value was used for the current point.
xPrev[argMin] = xCur[argMin];
yPrev[argMin] = yCur[argMin];
if (cursors[argMin].MoveNext())
{
xGetters[argMin](ref xCur[argMin]);
yGetters[argMin](ref yCur[argMin]);
}

cursors[argMin].MoveNext();
}

foreach (var curs in cursors)
curs.Dispose();

return avgPoints;
}
#endif
private protected override IEnumerable<string> GetPerInstanceColumnsToSave(RoleMappedSchema schema)
{
Host.CheckValue(schema, nameof(schema));
Expand Down