Skip to content

Commit 23a1fa5

Browse files
prathyusha12345CESARDELATORRE
prathyusha12345
authored andcommitted
Added code to classify github issues into the best 3 labels. (dotnet#260)
1.Created a class to hold Prediction values 2.Added Score filed in GitHubIssuePrediction class. 3.Changed the existing code in Labeler class. 4.Added new method to find the best 3 scores and return their indexes so that we can get the labels of those matched indexes from slotnames.
1 parent e6a730a commit 23a1fa5

File tree

4 files changed

+121
-16
lines changed

4 files changed

+121
-16
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace GitHubLabeler.DataStructures
6+
{
7+
public class FullPrediction
8+
{
9+
public string PredictedLabel;
10+
public float Score;
11+
public int OriginalSchemaIndex;
12+
13+
public FullPrediction(string predictedLabel, float score, int originalSchemaIndex)
14+
{
15+
PredictedLabel = predictedLabel;
16+
Score = score;
17+
OriginalSchemaIndex = originalSchemaIndex;
18+
}
19+
}
20+
}

samples/csharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/GitHubLabeler/GitHubLabelerConsoleApp/DataStructures/GitHubIssuePrediction.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,7 @@ internal class GitHubIssuePrediction
99
{
1010
[ColumnName("PredictedLabel")]
1111
public string Area;
12+
13+
public float[] Score;
1214
}
1315
}

samples/csharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/GitHubLabeler/GitHubLabelerConsoleApp/Labeler.cs

Lines changed: 97 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.IO;
99
using GitHubLabeler.DataStructures;
1010
using Common;
11+
using Microsoft.ML.Data;
1112

1213
namespace GitHubLabeler
1314
{
@@ -23,6 +24,8 @@ internal class Labeler
2324
private readonly PredictionEngine<GitHubIssue, GitHubIssuePrediction> _predEngine;
2425
private readonly ITransformer _trainedModel;
2526

27+
private FullPrediction[] _fullPredictions;
28+
2629
public Labeler(string modelPath, string repoOwner = "", string repoName = "", string accessToken = "")
2730
{
2831
_modelPath = modelPath;
@@ -54,22 +57,91 @@ public Labeler(string modelPath, string repoOwner = "", string repoName = "", st
5457

5558
public void TestPredictionForSingleIssue()
5659
{
57-
GitHubIssue singleIssue = new GitHubIssue() { ID = "Any-ID", Title = "Entity Framework crashes", Description = "When connecting to the database, EF is crashing" };
60+
GitHubIssue singleIssue = new GitHubIssue() {
61+
ID = "Any-ID",
62+
Title = "Crash in SqlConnection when using TransactionScope",
63+
Description = "I'm using SqlClient in netcoreapp2.0. Sqlclient.Close() crashes in Linux but works on Windows"
64+
};
5865

59-
//Predict label for single hard-coded issue
60-
//Score
66+
//Predict labels and scores for single hard-coded issue
6167
var prediction = _predEngine.Predict(singleIssue);
68+
69+
_fullPredictions = GetBestThreePredictions(prediction);
70+
71+
Console.WriteLine("1st Label: " + _fullPredictions[0].PredictedLabel + " with score: " + _fullPredictions[0].Score);
72+
Console.WriteLine("2nd Label: " + _fullPredictions[1].PredictedLabel + " with score: " + _fullPredictions[1].Score);
73+
Console.WriteLine("3rd Label: " + _fullPredictions[2].PredictedLabel + " with score: " + _fullPredictions[2].Score);
74+
6275
Console.WriteLine($"=============== Single Prediction - Result: {prediction.Area} ===============");
6376
}
6477

78+
private FullPrediction[] GetBestThreePredictions(GitHubIssuePrediction prediction)
79+
{
80+
float[] scores = prediction.Score;
81+
int size = scores.Length;
82+
int index0, index1, index2 = 0;
83+
84+
VBuffer<ReadOnlyMemory<char>> slotNames = default;
85+
_predEngine.OutputSchema[nameof(GitHubIssuePrediction.Score)].GetSlotNames(ref slotNames);
86+
87+
GetIndexesOfTopThreeScores(scores, size, out index0, out index1, out index2);
88+
89+
_fullPredictions = new FullPrediction[]
90+
{
91+
new FullPrediction(slotNames.GetItemOrDefault(index0).ToString(),scores[index0],index0),
92+
new FullPrediction(slotNames.GetItemOrDefault(index1).ToString(),scores[index1],index1),
93+
new FullPrediction(slotNames.GetItemOrDefault(index2).ToString(),scores[index2],index2)
94+
};
95+
96+
return _fullPredictions;
97+
}
98+
99+
private void GetIndexesOfTopThreeScores(float[] scores, int n, out int index0, out int index1, out int index2)
100+
{
101+
int i;
102+
float first, second, third;
103+
index0 = index1 = index2 = 0;
104+
if (n < 3)
105+
{
106+
Console.WriteLine("Invalid Input");
107+
return;
108+
}
109+
third = first = second = 000;
110+
for (i = 0; i < n; i++)
111+
{
112+
// If current element is
113+
// smaller than first
114+
if (scores[i] > first)
115+
{
116+
third = second;
117+
second = first;
118+
first = scores[i];
119+
}
120+
// If arr[i] is in between first
121+
// and second then update second
122+
else if (scores[i] > second)
123+
{
124+
third = second;
125+
second = scores[i];
126+
}
127+
128+
else if (scores[i] > third)
129+
third = scores[i];
130+
}
131+
var scoresList = scores.ToList();
132+
index0 = scoresList.IndexOf(first);
133+
index1 = scoresList.IndexOf(second);
134+
index2 = scoresList.IndexOf(third);
135+
}
136+
65137
// Label all issues that are not labeled yet
66138
public async Task LabelAllNewIssuesInGitHubRepo()
67139
{
68140
var newIssues = await GetNewIssues();
69141
foreach (var issue in newIssues.Where(issue => !issue.Labels.Any()))
70142
{
71-
var label = PredictLabel(issue);
72-
ApplyLabel(issue, label);
143+
var label = PredictLabels(issue);
144+
ApplyLabels(issue, label);
73145
}
74146
}
75147

@@ -89,7 +161,7 @@ private async Task<IReadOnlyList<Issue>> GetNewIssues()
89161
.ToList();
90162
}
91163

92-
private string PredictLabel(Octokit.Issue issue)
164+
private FullPrediction[] PredictLabels(Octokit.Issue issue)
93165
{
94166
var corefxIssue = new GitHubIssue
95167
{
@@ -98,26 +170,35 @@ private string PredictLabel(Octokit.Issue issue)
98170
Description = issue.Body
99171
};
100172

101-
var predictedLabel = Predict(corefxIssue);
173+
_fullPredictions = Predict(corefxIssue);
102174

103-
return predictedLabel;
175+
return _fullPredictions;
104176
}
105177

106-
public string Predict(GitHubIssue issue)
107-
{
178+
public FullPrediction[] Predict(GitHubIssue issue)
179+
{
108180
var prediction = _predEngine.Predict(issue);
109181

110-
return prediction.Area;
182+
var fullPredictions = GetBestThreePredictions(prediction);
183+
184+
return fullPredictions;
111185
}
112186

113-
private void ApplyLabel(Issue issue, string label)
187+
private void ApplyLabels(Issue issue, FullPrediction[] fullPredictions)
114188
{
115189
var issueUpdate = new IssueUpdate();
116-
issueUpdate.AddLabel(label);
117190

118-
_client.Issue.Update(_repoOwner, _repoName, issue.Number, issueUpdate);
191+
//assign labels in GITHUB only if predicted score of all predictions is > 30%
192+
foreach (var fullPrediction in fullPredictions)
193+
{
194+
if (fullPrediction.Score >= 0.3)
195+
{
196+
issueUpdate.AddLabel(fullPrediction.PredictedLabel);
197+
_client.Issue.Update(_repoOwner, _repoName, issue.Number, issueUpdate);
119198

120-
Console.WriteLine($"Issue {issue.Number} : \"{issue.Title}\" \t was labeled as: {label}");
199+
Console.WriteLine($"Issue {issue.Number} : \"{issue.Title}\" \t was labeled as: {fullPredictions[0].PredictedLabel}");
200+
}
201+
}
121202
}
122-
}
203+
}
123204
}

samples/csharp/end-to-end-apps/MulticlassClassification-GitHubLabeler/GitHubLabeler/GitHubLabelerConsoleApp/Program.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ private static void TestSingleLabelPrediction(string modelFilePathName)
154154

155155
private static async Task PredictLabelsAndUpdateGitHub(string ModelPath)
156156
{
157+
Console.WriteLine(".............Retrieving Issues from GITHUB repo, predicting label/s and assigning predicted label/s......");
158+
157159
var token = Configuration["GitHubToken"];
158160
var repoOwner = Configuration["GitHubRepoOwner"]; //IMPORTANT: This can be a GitHub User or a GitHub Organization
159161
var repoName = Configuration["GitHubRepoName"];

0 commit comments

Comments
 (0)