Skip to content

Support for Categorical features in CalculateFeatureContribution of LightGBM #5018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 21, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Updated way of calculating feature contribution for categorical splits
  • Loading branch information
antoniovs1029 committed Apr 15, 2020
commit a2d8779f7e237c89540fca5f4ec918e65af80b8a
46 changes: 35 additions & 11 deletions src/Microsoft.ML.FastTree/TreeEnsemble/InternalRegressionTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1519,28 +1519,52 @@ public void AppendFeatureContributions(in VBuffer<float> src, BufferBuilder<floa
if (CategoricalSplit[node])
{
Contracts.Assert(CategoricalSplitFeatures != null);

int newNode = LteChild[node];
otherWay = GtChild[node];
bool match = false;
int selectedIndex = -1;
int newNode = 0;
foreach (var index in CategoricalSplitFeatures[node])
{
float fv = GetFeatureValue(src.GetItemOrDefault(index), node);
if (fv > 0.0f)
{
newNode = GtChild[node];
otherWay = LteChild[node];
match = true;
selectedIndex = index; // We only expect at most one match
break;
}
}

// What if we went the other way?
var ghostLeaf = GetLeafFrom(in src, otherWay);
var ghostOutput = GetOutput(ghostLeaf);

// If the ghost got a smaller output, the contribution of the categorical features is positive, so
// the contribution is true minus ghost.
foreach(var ifeat in CategoricalSplitFeatures[node])
contributions.AddFeature(ifeat, (float)(trueOutput - ghostOutput));
if (match)
{
newNode = GtChild[node];
otherWay = LteChild[node];

var ghostLeaf = GetLeafFrom(in src, otherWay);
var ghostOutput = GetOutput(ghostLeaf);
var diff = (float)(trueOutput - ghostOutput);
foreach (var index in CategoricalSplitFeatures[node])
{
if (index == selectedIndex) // this index caused the input to go to the GtChild
contributions.AddFeature(index, diff);
else // All of the others wouldn't cause it
contributions.AddFeature(index, -diff);
}
}
else
{
newNode = LteChild[node];
otherWay = GtChild[node];

var ghostLeaf = GetLeafFrom(in src, otherWay);
var ghostOutput = GetOutput(ghostLeaf);
var diff = (float)(trueOutput - ghostOutput);

// None of the indices caused the input to go to the GtChild,
// So all of them caused it to go to the Lte.
foreach (var index in CategoricalSplitFeatures[node])
contributions.AddFeature(index, diff);
}

node = newNode;
}
Expand Down