Skip to content

add root cause localization transformer #4925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 51 commits into from
May 11, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
d5ee205
add root cause localization transformer
suxi-ms Mar 10, 2020
f727a79
add test cases
suxi-ms Mar 16, 2020
92de1dc
revert sln changes
suxi-ms Mar 18, 2020
798289c
add evaluation
suxi-ms Mar 18, 2020
f2e128d
temp save for internal review
suxi-ms Mar 20, 2020
51569e3
rename function
suxi-ms Mar 20, 2020
59c6e89
temp save bottom up points for switch desktop
suxi-ms Mar 22, 2020
29216e0
update from laptop
suxi-ms Mar 22, 2020
69da330
save for add test
suxi-ms Mar 23, 2020
e1c5432
add root cause localization algorithm
suxi-ms Mar 23, 2020
3a1d1c5
add root cause localization algorithm
suxi-ms Mar 23, 2020
8f97602
print score, path and directions in sample
suxi-ms Mar 23, 2020
48123f4
merge with master
suxi-ms Mar 23, 2020
c47302f
extract root cause analyzer
suxi-ms Mar 23, 2020
b07ad28
refine code
suxi-ms Mar 23, 2020
c729877
merge with master
suxi-ms Mar 24, 2020
ebbdb0d
update for algorithm
suxi-ms Mar 26, 2020
0d43b0d
add evaluatin
suxi-ms Mar 26, 2020
5778eed
some refine for code
suxi-ms Mar 26, 2020
c9ed044
fix some typo
suxi-ms Mar 27, 2020
e440f25
remove unused code
suxi-ms Mar 27, 2020
feba6f4
reformat code
suxi-ms Mar 27, 2020
686831c
updates
suxi-ms Mar 27, 2020
ddc8a36
update from review
suxi-ms Mar 29, 2020
475ee8a
read double for beta
suxi-ms Apr 1, 2020
8d874ca
remove SignatureDataTransform constructor
suxi-ms Apr 1, 2020
0674ab3
update
suxi-ms Apr 1, 2020
4c5b8fb
update
suxi-ms Apr 1, 2020
08d607c
remove white space
suxi-ms Apr 2, 2020
c688233
refine internal logic
suxi-ms Apr 7, 2020
98637db
update
suxi-ms Apr 8, 2020
4ff2ed1
update
suxi-ms Apr 8, 2020
c22ad50
updated test
suxi-ms Apr 13, 2020
ea7ddbe
update score
suxi-ms Apr 15, 2020
547aef2
update variable name
suxi-ms Apr 17, 2020
8d17c3c
add some comments
suxi-ms Apr 21, 2020
66b614a
refine internal function
suxi-ms Apr 23, 2020
12e7e18
handle for infinity and nan
suxi-ms Apr 24, 2020
e213615
rename the algorithm by removing DT
suxi-ms Apr 26, 2020
30915cd
Update src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs
suxi-ms Apr 27, 2020
fda4ec7
fix type
suxi-ms Apr 27, 2020
620ef58
add an else branch when delta is negative
suxi-ms Apr 27, 2020
ae5722f
Merge branch 'master' of https://github.com/suxi-ms/machinelearning
suxi-ms Apr 27, 2020
7f89fea
update model signature
suxi-ms Apr 28, 2020
42dcbc2
update rca interface by removing transformer
suxi-ms May 7, 2020
9893fad
add more documents
suxi-ms May 7, 2020
c831e43
update
suxi-ms May 8, 2020
16f5b33
update
suxi-ms May 9, 2020
9cd8739
update the constructor
suxi-ms May 9, 2020
f80c200
update comments
suxi-ms May 9, 2020
7c1c348
fix typo
suxi-ms May 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update
  • Loading branch information
suxi-ms committed May 8, 2020
commit c831e439710bc401bac287221a7081a685e6fac9
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ public static void Example()
// exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();

// Create an root cause localizatin input instance.
// Create an root cause localization input instance.
DateTime timestamp = GetTimestamp();
var data = new RootCauseLocalizationInput(timestamp, GetAnomalyDimension(), new List<MetricSlice>() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL);

// Get the root cause localization result
// Get the root cause localization result.
RootCause prediction = mlContext.AnomalyDetection.LocalizeRootCause(data);

// Print the localization result.
Expand Down
16 changes: 8 additions & 8 deletions src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Reflection;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.TimeSeries;
using Microsoft.ML.Transforms.TimeSeries;

Expand Down Expand Up @@ -164,12 +165,14 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
/// </example>
public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5)
{
IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);

//check the root cause input
CheckRootCauseInput(src);
CheckRootCauseInput(host, src);

//check beta
if (beta < 0 || beta > 1) {
throw new ArgumentException("Beta must be in [0,1]");
host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");
Copy link
Contributor

@harishsk harishsk May 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need this if check. The CheckUserArg is performing the check. #Resolved

Copy link
Member Author

@suxi-ms suxi-ms May 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need this if check. The CheckUserArg is performing the check.

will update #Resolved

}

//find out the root cause
Expand All @@ -178,11 +181,11 @@ public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog,
return dst;
}

private static void CheckRootCauseInput(RootCauseLocalizationInput src)
private static void CheckRootCauseInput(IHostEnvironment host, RootCauseLocalizationInput src)
{
if (src.Slices.Count < 1)
{
throw new ArgumentException("Length of Slices must be larger than 0");
host.CheckUserArg(src.Slices.Count > 1 , nameof(src.Slices), "Must has more than one item");
}

bool containsAnomalyTimestamp = false;
Expand All @@ -193,10 +196,7 @@ private static void CheckRootCauseInput(RootCauseLocalizationInput src)
containsAnomalyTimestamp = true;
}
}
if (!containsAnomalyTimestamp)
{
throw new ArgumentException("Has no points in the given anomaly timestamp");
}
host.CheckUserArg(containsAnomalyTimestamp, nameof(src.Slices), "Has no points in the given anomaly timestamp");
}

/// <summary>
Expand Down
121 changes: 55 additions & 66 deletions src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using Microsoft.ML.Internal.Utilities;

namespace Microsoft.ML.TimeSeries
Expand All @@ -29,7 +30,7 @@ public RootCause Analyze()
return AnalyzeOneLayer(_src);
}

//This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. For full result, call this function recursively
//This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly.
private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src)
{
RootCause dst = new RootCause();
Expand All @@ -42,11 +43,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src)
Dictionary<string, Point> dimPointMapping = pointInfo.Item3;

//which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension
if (anomalyTree.ParentNode == null)
{
return dst;
}
if (dimPointMapping.Count == 0)
if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0)
{
return dst;
}
Expand All @@ -59,16 +56,8 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src)

protected List<Point> GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src)
{
List<Point> points = new List<Point>();
foreach (MetricSlice slice in src.Slices)
{
if (slice.TimeStamp.Equals(src.AnomalyTimestamp))
{
points = slice.Points;
}
}

return points;
MetricSlice slice = src.Slices.Single(slice => slice.TimeStamp.Equals(src.AnomalyTimestamp));
return slice.Points;
}

protected DimensionInfo SeperateDimension(Dictionary<string, Object> dimensions, Object aggSymbol)
Expand All @@ -92,8 +81,8 @@ protected DimensionInfo SeperateDimension(Dictionary<string, Object> dimensions,

protected Tuple<PointTree, PointTree, Dictionary<string, Point>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
{
PointTree pointTree = PointTree.CreateDefaultInstance();
PointTree anomalyTree = PointTree.CreateDefaultInstance();
PointTree pointTree = new PointTree();
PointTree anomalyTree = new PointTree();
Dictionary<string, Point> dimPointMapping = new Dictionary<string, Point>();

List<Point> totalPoints = GetTotalPointsForAnomalyTimestamp(src);
Expand Down Expand Up @@ -125,12 +114,8 @@ protected Tuple<PointTree, PointTree, Dictionary<string, Point>> GetPointsInfo(R

protected Dictionary<string, Object> GetSubDim(Dictionary<string, Object> dimension, List<string> keyList)
{
Dictionary<string, Object> subDim = new Dictionary<string, Object>();
foreach (string dim in keyList)
{
subDim.Add(dim, dimension[dim]);
}
return subDim;
return new Dictionary<string, object>(keyList.Select(dim => new KeyValuePair<string, object>(dim, dimension[dim])).ToDictionary(kvp => kvp.Key, kvp => kvp.Value));

}

protected List<RootCauseItem> LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary<string, Object> anomalyDimension, List<string> aggDims)
Copy link
Contributor

@harishsk harishsk May 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Please consider something along the lines of the following for the GetSubDim function.

return new Dictionary<string, object>(keyList.Select(dim => new KeyValuePair<string, object>(dim, dimension[dim])));

#Resolved

Copy link
Member Author

@suxi-ms suxi-ms May 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Please consider something along the lines of the following for the GetSubDim function.

return new Dictionary<string, object>(keyList.Select(dim => new KeyValuePair<string, object>(dim, dimension[dim])));

updated #Resolved

Expand Down Expand Up @@ -203,7 +188,8 @@ protected List<Point> GetTopAnomaly(List<Point> anomalyPoints, Point root, List<
{
anomalyPoints.Reverse();
}
else {
else
{
anomalyPoints = anomalyPoints.FindAll(x => x.Delta < 0);
}
if (anomalyPoints.Count == 1)
Expand Down Expand Up @@ -245,21 +231,23 @@ protected BestDimension SelectBestDimension(List<Point> totalPoints, List<Point>

foreach (string dimKey in aggDim)
{
BestDimension dimension = BestDimension.CreateDefaultInstance();
BestDimension dimension = new BestDimension();
dimension.DimensionKey = dimKey;

UpdateDistribution(dimension.PointDis, totalPoints, dimKey);
UpdateDistribution(dimension.AnomalyDis, anomalyPoints, dimKey);

double relativeEntropy = GetDimensionEntropy(dimension.PointDis, dimension.AnomalyDis);
double gain = totalEntropy - relativeEntropy;
if (Double.IsNaN(gain)) {
if (Double.IsNaN(gain))
{
gain = 0;
}
entroyGainMap.Add(dimension, gain);

double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis);
if (Double.IsInfinity(gainRatio)) {
if (Double.IsInfinity(gainRatio))
{
gainRatio = 0;
}
entroyGainRatioMap.Add(dimension, gainRatio);
Expand All @@ -282,7 +270,7 @@ private BestDimension SelectBestDimension(Dictionary<string, List<Point>> pointC

foreach (string dimKey in aggDim)
{
BestDimension dimension = BestDimension.CreateDefaultInstance();
BestDimension dimension = new BestDimension();
dimension.DimensionKey = dimKey;

if (pointChildren.ContainsKey(dimKey))
Expand All @@ -295,14 +283,16 @@ private BestDimension SelectBestDimension(Dictionary<string, List<Point>> pointC
}

double entropy = GetEntropy(dimension.PointDis.Count, dimension.AnomalyDis.Count);
if (Double.IsNaN(entropy)) {
if (Double.IsNaN(entropy))
{
entropy = Double.MaxValue;
}
entropyMap.Add(dimension, entropy);

double gainRatio = entropy / GetDimensionInstrinsicValue(dimension.PointDis);

if (Double.IsInfinity(gainRatio)) {
if (Double.IsInfinity(gainRatio))
{
gainRatio = 0;
}
entropyRatioMap.Add(dimension, gainRatio);
Expand All @@ -326,7 +316,8 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint)
{
return AnomalyDirection.Down;
}
else {
else
{
return AnomalyDirection.Same;
}
}
Expand Down Expand Up @@ -357,7 +348,8 @@ private void GetRootCauseDirectionAndScore(Dictionary<string, Point> dimPointMap
{
dst.Items[i].Score = 1;
}
else {
else
{
dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta);
}
}
Expand All @@ -372,7 +364,8 @@ private void GetRootCauseDirectionAndScore(Dictionary<string, Point> dimPointMap
{
dst.Items[0].Score = 1;
}
else {
else
{
dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta);
}
dst.Items[0].Direction = GetRootCauseDirection(rootCausePoint);
Expand Down Expand Up @@ -469,25 +462,33 @@ private BestDimension FindBestDimension(SortedDictionary<BestDimension, double>
{
if (dimension.Key.AnomalyDis.Count == 1 || (isLeavesLevel ? dimension.Value >= meanGain : dimension.Value <= meanGain))
{
if (dimension.Key.AnomalyDis.Count > 1)
if (best == null)
{
if (best == null || (!Double.IsNaN(valueRatioMap[best]) && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))))
{
best = dimension.Key;
}
best = dimension.Key;
}
else
{
if (best == null || best.AnomalyDis.Count > 1)
bool isRatioNan = Double.IsNaN(valueRatioMap[best]);
if (dimension.Key.AnomalyDis.Count > 1)
{
best = dimension.Key;
if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)))
{
best = dimension.Key;
}
}
else
{
if (!Double.IsNaN(valueRatioMap[best]) && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
if (best.AnomalyDis.Count > 1)
{
best = dimension.Key;
}
else
{
if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
{
best = dimension.Key;
}
}
}
}
}
Expand Down Expand Up @@ -660,26 +661,19 @@ private void UpdateDistribution(Dictionary<string, int> distribution, List<Point
}
}

private double Log2(double val)
{
if (Double.IsNaN(val))
{
return 0;
}
[MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
private double Log2(double val) => Double.IsNaN(val) ? 0 : Math.Log(val) / Math.Log(2);

return Math.Log(val) / Math.Log(2);
}

private static bool ContainsAll(Dictionary<string, Object> bigDic, Dictionary<string, Object> smallDic)
private static bool ContainsAll(Dictionary<string, Object> bigDictionary, Dictionary<string, Object> smallDictionary)
{
foreach (var item in smallDic)
foreach (var item in smallDictionary)
{
if (!bigDic.ContainsKey(item.Key))
if (!bigDictionary.ContainsKey(item.Key))
{
return false;
}

if (bigDic.ContainsKey(item.Key) && !bigDic[item.Key].Equals(smallDic[item.Key]))
if (bigDictionary.ContainsKey(item.Key) && !bigDictionary[item.Key].Equals(smallDictionary[item.Key]))
{
return false;
}
Expand Down Expand Up @@ -714,12 +708,10 @@ public class PointTree
public Dictionary<string, List<Point>> ChildrenNodes;
public List<Point> Leaves;

public static PointTree CreateDefaultInstance()
public PointTree()
{
PointTree instance = new PointTree();
instance.Leaves = new List<Point>();
instance.ChildrenNodes = new Dictionary<string, List<Point>>();
return instance;
Leaves = new List<Point>();
ChildrenNodes = new Dictionary<string, List<Point>>();
}
}

Expand All @@ -729,13 +721,10 @@ public sealed class BestDimension : IComparable
public Dictionary<string, int> AnomalyDis;
public Dictionary<string, int> PointDis;

public BestDimension() { }
public static BestDimension CreateDefaultInstance()
public BestDimension()
{
BestDimension instance = new BestDimension();
instance.AnomalyDis = new Dictionary<string, int>();
instance.PointDis = new Dictionary<string, int>();
return instance;
AnomalyDis = new Dictionary<string, int>();
PointDis = new Dictionary<string, int>();
}

public int CompareTo(object obj)
Expand Down