Skip to content

[SrCnnEntireAnomalyDetector] Upgrade boundary calculation and expected value calculation #5436

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 95 additions & 49 deletions src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ internal static class Defaults
{
public const double Threshold = 0.3;
public const int BatchSize = 2000;
public const double Sensitivity = 55;
public const double Sensitivity = 70;
public const SrCnnDetectMode DetectMode = SrCnnDetectMode.AnomalyOnly;
public const int Period = 0;
public const SrCnnDeseasonalityMode DeseasonalityMode = SrCnnDeseasonalityMode.Stl;
Expand Down Expand Up @@ -349,36 +349,55 @@ internal sealed class SrCnnEntireModeler
private static readonly int _judgementWindowSize = 40;
private static readonly double _eps = 1e-8;
private static readonly double _deanomalyThreshold = 0.35;
private static readonly double _boundSensitivity = 70.0;

// A fixed lookup table which returns factor using sensitivity as index.
// Since Margin = BoundaryUnit * factor, this factor is calculated to make sure Margin == Boundary when sensitivity is 50,
// and increases/decreases exponentially as sensitivity increases/decreases.
// The factor array is generated by formula:
// f(x)=1, if x=50;
// f(x)=f(x+1)*(1.25+0.001*x), if 0<=x<50;
// f(x)=f(x+1)/(1.25+0.001*(x-50)), if 50<x<60;
// f(x)=f(x+1)/(1.15+0.001*(x-50)),, if 60<=x<=100.
private static readonly double _boundSensitivity = 93.0;
private static readonly double _unitForZero = 0.3;

// pseudo-code to generate the factors.
// factors = []
// for i in range(0, 30):
// sen = 0.8 * (i - 30) ** 2 + 32
// factors.append(sen)
// for i in range(30, 50):
// sen = -1.25 * i + 67.5
// factors.append(sen)
// for i in range(50, 60):
// sen = -0.4 * i + 25
// factors.append(sen)
// for i in range(60, 70):
// sen = -0.04 * i + 3.4
// factors.append(sen)
// for i in range(70, 80):
// sen = -0.03 * i + 2.7
// factors.append(sen)
// for i in range(80, 90):
// sen = -0.015 * i + 1.4999999999999998
// factors.append(sen)
// for i in range(90, 98):
// sen = -0.011818181818181818 * i + 1.2136363636363636
// factors.append(sen)
// ratio.append(-0.011818181818181818 * 99 + 1.2136363636363636)
// ratio.append(0.01200000000000001)
// for i in range(5):
// sen= -0.001925*i+ 0.008
// ratio.append(sen)
// ratio.append(0)
// ratio=ratio[5:]
private static readonly double[] _factors = new double[]{
184331.62871148242, 141902.71648305038, 109324.12672037778, 84289.9974713784, 65038.57829581667, 50222.84038287002,
38812.08684920403, 30017.081863266845, 23233.035497884553, 17996.15452973242, 13950.50738738947, 10822.736530170265,
8402.745753237783, 6528.939979205737, 5076.93622022219, 3950.92312857758, 3077.042935029268, 2398.318733460069,
1870.7634426365591, 1460.393007522685, 1140.9320371270976, 892.0500681212648, 698.0047481387048, 546.5972968979678,
428.36778753759233, 335.97473532360186, 263.71643275007995, 207.16137686573444, 162.8627176617409, 128.13746472206208,
100.8956415134347, 79.50799173635517, 62.70346351447568, 49.48971074544253, 39.09139869308257, 30.90229145698227,
24.448015393182175, 19.35709849024717, 15.338429865489042, 12.163703303322, 9.653732780414286, 7.667778221139226,
6.095213212352326, 4.8490160798347866, 3.8606815922251485, 3.076240312529999, 2.4531421949999994, 1.9578149999999996,
1.5637499999999998, 1.25, 1.0, 0.8695652173913044, 0.7554867223208555, 0.655804446459076, 0.5687809596349316,
0.4928777813127657, 0.4267340097946024, 0.36914706729636887, 0.3190553736355825, 0.27552277516026125, 0.23772456873189068,
0.20493497304473338, 0.17651591132190647, 0.1519069804835684, 0.13061649224726435, 0.11221348131208278, 0.09632058481723846,
0.08260770567516164, 0.0707863801843716, 0.06060477755511267, 0.051843265658779024, 0.0443104834690419, 0.03783986632710667,
0.03228657536442549, 0.027524787181948417, 0.02344530424356765, 0.019953450420057577, 0.01696721974494692, 0.014415649740821513,
0.012237393667929978, 0.010379468759906684, 0.008796159966022614, 0.0074480609365136455, 0.006301235986898177,
0.00532648857725966, 0.004498723460523362, 0.0037963911059268884, 0.0032010043051660104, 0.002696718032995797,
0.0022699646742388863, 0.0019091376570554135, 0.0011570531254881296, 0.000697019955113331, 0.00041737721863073713,
0.000248438820613534, 0.00014700521929794912, 8.647365841055832e-05, 5.056939088336744e-05, 2.9400808653120604e-05,
1.6994687082728674e-05, 9.767061541798089e-06
};
532.0, 492.8, 455.20000000000005, 419.20000000000005, 384.8, 352.0, 320.8, 291.2, 263.20000000000005,
236.8, 212.0, 188.8, 167.20000000000002, 147.2, 128.8, 112.0, 96.8, 83.2, 71.2, 60.8, 52.0, 44.8, 39.2,
35.2, 32.8, 30.0, 28.75, 27.5, 26.25, 25.0, 23.75, 22.5, 21.25, 20.0, 18.75, 17.5, 16.25, 15.0, 13.75,
12.5, 11.25, 10.0, 8.75, 7.5, 6.25, 5.0, 4.599999999999998, 4.199999999999999, 3.799999999999997,
3.3999999999999986, 3.0, 2.599999999999998, 2.1999999999999993, 1.7999999999999972, 1.3999999999999986,
1.0, 0.96, 0.9199999999999999, 0.8799999999999999, 0.8399999999999999, 0.7999999999999998,
0.7599999999999998, 0.7199999999999998, 0.6799999999999997, 0.6399999999999997, 0.6000000000000001,
0.5700000000000003, 0.54, 0.5100000000000002, 0.4800000000000004, 0.4500000000000002, 0.4200000000000004,
0.3900000000000001, 0.3600000000000003, 0.33000000000000007, 0.2999999999999998, 0.2849999999999999,
0.2699999999999998, 0.2549999999999999, 0.23999999999999977, 0.22499999999999987, 0.20999999999999974,
0.19499999999999984, 0.17999999999999994, 0.1649999999999998, 0.1499999999999999, 0.13818181818181818,
0.12636363636363646, 0.1145454545454545, 0.10272727272727278, 0.09090909090909083, 0.0790909090909091,
0.06727272727272737, 0.043636363636363695, 0.01200000000000001, 0.008, 0.0060750000000000005, 0.00415,
0.0022249999999999995, 0.0002999999999999999, 0.0
};

private readonly double _threshold;
private readonly double _sensitivity;
Expand All @@ -387,6 +406,8 @@ internal sealed class SrCnnEntireModeler
private readonly IDeseasonality _deseasonalityFunction;

//used in all modes
private double _minimumOriginValue;
private double _maximumOriginValue;
private readonly double[] _predictArray;
private double[] _backAddArray;
private double[] _fftRe;
Expand Down Expand Up @@ -449,10 +470,15 @@ public void Train(double[] values, ref double[][] results)
Array.Resize<double[]>(ref results, values.Length);
}

_minimumOriginValue = Double.MaxValue;
_maximumOriginValue = Double.MinValue;

Array.Resize(ref _seriesToDetect, values.Length);
for (int i = 0; i < values.Length; ++i)
{
_seriesToDetect[i] = values[i];
_minimumOriginValue = Math.Min(_minimumOriginValue, values[i]);
_maximumOriginValue = Math.Max(_maximumOriginValue, values[i]);
}

if (_period > 0)
Expand Down Expand Up @@ -641,7 +667,7 @@ private void GetExpectedValue(double[] values, double[][] results)

for (int i = 0; i < results.Length; ++i)
{
results[i][3] = _ifftRe[i];
results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]);
}
}

Expand All @@ -650,7 +676,7 @@ private void GetExpectedValuePeriod(double[] values, double[][] results, IReadOn
//Step 8: Calculate Expected Value
for (int i = 0; i < values.Length; ++i)
{
results[i][3] = values[i] - residual[i];
results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(values[i] - residual[i]);
}
}

Expand Down Expand Up @@ -762,7 +788,8 @@ private void GetMargin(double[] values, double[][] results, double sensitivity)
{
//Step 10: Calculate UpperBound and LowerBound
var margin = CalculateMargin(_units[i], sensitivity);
results[i][3] = _ifftRe[i];
results[i][3] = AdjustExpectedValueBasedOnOriginalDataRange(_ifftRe[i]);

results[i][4] = _units[i];
results[i][5] = _ifftRe[i] + margin;
results[i][6] = _ifftRe[i] - margin;
Expand All @@ -783,6 +810,21 @@ private void GetMargin(double[] values, double[][] results, double sensitivity)
}
}

// Adjust the expected value if original data range is non-negative or non-positive
private double AdjustExpectedValueBasedOnOriginalDataRange(double expectedValue)
{
if (_minimumOriginValue >= 0 && expectedValue < 0)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the minimum original value > 0 and you set the expected value = 0, couldn't this leave the expected value out of range? Is this adjusted elsewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I does't adjust the expected value that much, there are cases that the expected value curve is not smooth and looks odd. Currently for not anomaly points, we have logic that ensure it is close to the original value which can guarantee the expected value is not far away from the original data range.


In reply to: 505667560 [](ancestors = 505667560)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious does kensho have similar check in your service?

Copy link
Contributor Author

@guinao guinao Oct 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kensho has similar logic to adjust the expected value of anomaly / non-anomaly points and don't implement the data range check as there is no dsat report on this so far.


In reply to: 506583086 [](ancestors = 506583086)

{
expectedValue = 0;
}
else if (_maximumOriginValue <= 0 && expectedValue > 0)
{
expectedValue = 0;
}

return expectedValue;
}

// Adjust the expected value so that it is within the bound margin of value
private double AdjustExpectedValueBasedOnBound(double value, double expectedValue, double unit)
{
Expand Down Expand Up @@ -880,18 +922,20 @@ private void CalculateExpectedValueByFft(double[] data)
FftUtils.ComputeBackwardFft(_fftRe, _fftIm, _ifftRe, _ifftIm, length);
}

private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys)
private void CalculateBoundaryUnit(double[] data, bool[] isAnomalies)
{
int window = Math.Min(data.Length / 3, 512);
double trendFraction = 0.5; // mix trend and average of trend
double trendSum = 0;
int calculationSize = 0;
bool closeToZero = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, so by default closeToZero is true and closeToZero &= trend < _eps ? this seems a bit un-intuitive, you mean closeToZero = false and closeToZero |= trend < _eps or I misunderstood this variable?


MedianFilter(data, window, true);
for (int i = 0; i < _trends.Length; ++i)
{
if (!isAnomalys[i])
if (!isAnomalies[i])
{
closeToZero = closeToZero && _trends[i] < _eps;
trendSum += Math.Abs(_trends[i]);
++calculationSize;
}
Expand All @@ -910,10 +954,17 @@ private void CalculateBoundaryUnit(double[] data, bool[] isAnomalys)
Array.Resize(ref _units, _trends.Length);
for (int i = 0; i < _units.Length; ++i)
{
_units[i] = Math.Max(1, averageTrendPart + Math.Abs(_trends[i]) * trendFraction);
if (double.IsInfinity(_units[i]))
if (closeToZero)
{
_units[i] = _unitForZero;
}
else
{
throw new ArithmeticException("Not finite unit value");
_units[i] = averageTrendPart + Math.Abs(_trends[i]) * trendFraction;
if (double.IsInfinity(_units[i]))
{
throw new ArithmeticException("Not finite unit value");
}
}
}
}
Expand Down Expand Up @@ -1031,19 +1082,14 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool
return anomalyScore;
}

double distance = Math.Abs(exp - value);
List<double> margins = new List<double>();
for (int i = 100; i >= 0; --i)
{
margins.Add(CalculateMargin(unit, i));
}
double distanceFactor = Math.Abs(exp - value) / unit;

int lb = 0;
int ub = 100;
while (lb < ub)
{
int mid = (lb + ub) / 2;
if (margins[mid] < distance)
if (_factors[100 - mid] < distanceFactor)
{
lb = mid + 1;
}
Expand All @@ -1053,15 +1099,15 @@ private double CalculateAnomalyScore(double value, double exp, double unit, bool
}
}

if (Math.Abs(margins[lb] - distance) < _eps || lb == 0)
if (_factors[100 - lb] == distanceFactor || lb == 0)
{
anomalyScore = lb;
}
else
{
double lowerMargin = margins[lb - 1];
double upperMargin = margins[lb];
anomalyScore = lb - 1 + (distance - lowerMargin) / (upperMargin - lowerMargin);
double lowerMargin = _factors[101 - lb];
double upperMargin = _factors[100 - lb];
anomalyScore = lb - 1 + (distanceFactor - lowerMargin) / (upperMargin - lowerMargin);
}

return anomalyScore / 100.0f;
Expand Down
68 changes: 65 additions & 3 deletions test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ public void TestSrCnnBatchAnomalyDetector(

// Do batch anomaly detection
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName,
threshold: 0.35, batchSize: batchSize, sensitivity: 90.0, mode);
threshold: 0.35, batchSize: batchSize, sensitivity: 98.0, mode);

// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
Expand Down Expand Up @@ -694,7 +694,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData(
{
Threshold = 0.3,
BatchSize = -1,
Sensitivity = 53.0,
Sensitivity = 64.0,
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
Period = 288,
DeseasonalityMode = mode
Expand Down Expand Up @@ -741,7 +741,7 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
{
Threshold = 0.23,
BatchSize = -1,
Sensitivity = 53.0,
Sensitivity = 63.0,
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
Period = 288,
DeseasonalityMode = mode
Expand Down Expand Up @@ -776,6 +776,68 @@ public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
}
}

[Theory, CombinatorialData]
public void TestSrcnnEntireDetectNonnegativeData(
[CombinatorialValues(true, false)] bool isPositive)
{
var ml = new MLContext(1);
IDataView dataView;
List<TimeSeriesDataDouble> data;

// Load data from file into the dataView
var dataPath = GetDataPath("Timeseries", "non_negative_case.csv");

// Load data from file into the dataView
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();

if (!isPositive)
{
for (int i = 0; i < data.Count; ++i)
{
data[i].Value = - data[i].Value;
}
}

dataView = ml.Data.LoadFromEnumerable<TimeSeriesDataDouble>(data);

// Setup the detection arguments
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
string inputColumnName = nameof(TimeSeriesDataDouble.Value);

// Do batch anomaly detection
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = 0.10,
BatchSize = -1,
Sensitivity = 99.0,
DetectMode = SrCnnDetectMode.AnomalyAndMargin,
Period = 0,
DeseasonalityMode = SrCnnDeseasonalityMode.Stl
};

var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);

// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
outputDataView, reuseRowObject: false);

if (isPositive)
{
foreach (var prediction in predictionColumn)
{
Assert.True(prediction.Prediction[3] >= 0);
}
}
else
{
foreach (var prediction in predictionColumn)
{
Assert.True(prediction.Prediction[3] <= 0);
}
}
}

[Fact]
public void RootCauseLocalization()
{
Expand Down
Loading