dotnet · OliaG · May 22, 2018 · May 23, 2018 · May 24, 2018 · May 24, 2018
diff --git a/Samples/UCI/readme.md b/Samples/UCI/readme.md
diff --git a/build.proj b/build.proj
@@ -65,16 +65,16 @@
   </Target>
 
   <ItemGroup>
-    <TestFile Include="$(MSBuildThisFileDirectory)/test/data/external/winequality-white.csv"
+    <TestFile Include="$(MSBuildThisFileDirectory)/examples/datasets/external/winequality-white.csv"
       Url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
-      DestinationFile="$(MSBuildThisFileDirectory)test/data/external/winequality-white.csv" />
+      DestinationFile="$(MSBuildThisFileDirectory)examples/datasets/external/winequality-white.csv" />
   </ItemGroup>
 
   <Target Name="DownloadExternalTestFiles" Inputs="@(TestFile)" Outputs="%(TestFile.DestinationFile)">
     <Message Importance="High" Text="Downloading external test files... %(TestFile.DestinationFile)" />
     <DownloadFilesFromUrl Items="@(TestFile)"
-                         DestinationDir="test/data/external"
-                         TreatErrorsAsWarnings="true"/>
+                          DestinationDir="examples/datasets/external"
+                          TreatErrorsAsWarnings="true"/>
   </Target>
 
   <Target Name="RunTests" Condition="'$(RunTests)'=='true'">

diff --git a/...rted/BinaryClassification_SentimentAnalysis/BinaryClassification_SentimentAnalysis.csproj b/...rted/BinaryClassification_SentimentAnalysis/BinaryClassification_SentimentAnalysis.csproj
@@ -0,0 +1,26 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <LangVersion>latest</LangVersion>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Content Include="..\..\datasets\imdb_labeled.txt" Link="datasets\imdb_labeled.txt">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+    <Content Include="..\..\datasets\yelp_labeled.txt" Link="datasets\yelp_labeled.txt">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.ML" Version="0.1.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Folder Include="datasets\" />
+  </ItemGroup>
+
+</Project>
diff --git a/examples/GettingStarted/BinaryClassification_SentimentAnalysis/Program.cs b/examples/GettingStarted/BinaryClassification_SentimentAnalysis/Program.cs
@@ -0,0 +1,102 @@
+using System;
+using System.IO;
+using System.Linq;
+using System.Threading.Tasks;
+using Microsoft.ML;
+using Microsoft.ML.Models;
+using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Transforms;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    internal static class Program
+    {
+        private static string AppPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]);
+        private static string TrainDataPath => Path.Combine(AppPath, "datasets", "imdb_labeled.txt");
+        private static string TestDataPath => Path.Combine(AppPath, "datasets", "yelp_labeled.txt");
+        private static string ModelPath => Path.Combine(AppPath, "SentimentModel.zip");
+
+        private static async Task Main(string[] args)
+        {
+            // ML task includes 3 steps: training a ML model, evaluating how good it is,
+            // and if the quality is acceptable, using this model for predictions.
+            var model = await TrainAsync();
+
+            Evaluate(model);
+
+            var predictions = model.Predict(TestSentimentData.Sentiments);
+
+            var sentimentsAndPredictions =
+                TestSentimentData.Sentiments.Zip(predictions, (sentiment, prediction) => (sentiment, prediction));
+            foreach (var item in sentimentsAndPredictions)
+            {
+                Console.WriteLine(
+                    $"Sentiment: {item.sentiment.SentimentText} | Prediction: {(item.prediction.Sentiment ? "Positive" : "Negative")} sentiment");
+            }
+
+            Console.ReadLine();
+        }
+
+        public static async Task<PredictionModel<SentimentData, SentimentPrediction>> TrainAsync()
+        {
+            // LearningPipeline holds all steps of the learning process: data, transforms, learners.  
+            var pipeline = new LearningPipeline();
+
+            // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
+            // all the column names and their types. This will be used to create the model, and train it. 
+            pipeline.Add(new TextLoader<SentimentData>(TrainDataPath, useHeader: false, separator: "tab"));
+
+            // TextFeaturizer is a transform that will be used to featurize an input column to format and clean the data.
+            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
+
+            // FastTreeBinaryClassifier is an algorithm that will be used to train the model.
+            // It has three hyperparameters for tuning decision tree performance. 
+            pipeline.Add(new FastTreeBinaryClassifier() {NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2});
+
+            Console.WriteLine("=============== Training model ===============");
+            // The pipeline is trained on the dataset that has been loaded and transformed.
+            var model = pipeline.Train<SentimentData, SentimentPrediction>();
+
+            // Saving the model as a .zip file.
+            await model.WriteAsync(ModelPath);
+
+            Console.WriteLine("=============== End training ===============");
+            Console.WriteLine("The model is saved to {0}", ModelPath);
+
+            return model;
+        }
+
+        private static void Evaluate(PredictionModel<SentimentData, SentimentPrediction> model)
+        {
+            // To evaluate how good the model predicts values, the model is ran against new set
+            // of data (test data) that was not involved in training.
+            var testData = new TextLoader<SentimentData>(TestDataPath, useHeader: true, separator: "tab");
+
+            // BinaryClassificationEvaluator performs evaluation for Binary Classification type of ML problems.
+            var evaluator = new BinaryClassificationEvaluator();
+
+            Console.WriteLine("=============== Evaluating model ===============");
+
+            var metrics = evaluator.Evaluate(model, testData);
+            // BinaryClassificationMetrics contains the overall metrics computed by binary classification evaluators
+            // The Accuracy metric gets the accuracy of a classifier which is the proportion 
+            //of correct predictions in the test set.
+
+            // The Auc metric gets the area under the ROC curve.
+            // The area under the ROC curve is equal to the probability that the classifier ranks
+            // a randomly chosen positive instance higher than a randomly chosen negative one
+            // (assuming 'positive' ranks higher than 'negative').
+
+            // The F1Score metric gets the classifier's F1 score.
+            // The F1 score is the harmonic mean of precision and recall:
+            //  2 * precision * recall / (precision + recall).
+
+            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
+            Console.WriteLine($"Auc: {metrics.Auc:P2}");
+            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
+            Console.WriteLine("=============== End evaluating ===============");
+            Console.WriteLine();
+        }
+    }
+}
diff --git a/examples/GettingStarted/BinaryClassification_SentimentAnalysis/README.md b/examples/GettingStarted/BinaryClassification_SentimentAnalysis/README.md
@@ -0,0 +1,19 @@
+## Goal
+This is a getting started example that shows the simplest way of using ML.NET APIs for solving a binary classification problem on sentiment analysis example.
+
+## Problem
+The task is to build and train ML model (machine learning model) that will predict if a text has positive or negative sentiment. For training and evaluating the model we used imdb and yelp comments with known sentiments.
+
+## Problem Class - Binary Classification
+The described task is an example of a binary classification problem. 
+> In machine learning, `binary classification` is the problem of classifying instances into one of a two classes. (Classifying instances into more than two classes is called `multiclass classification`.)
+
+Machine learning engineering process includes three steps: training ML model, evaluating how good it is, and if the quality is acceptable, using this model for predictions. If the quality of the model is not good enough, different algorithms and/or additional data transformations can be applied and the model should be trained and evaluated again.
+
+1. **Training** the ML model is implemented in `TrainAsync()` method that constructs `LearningPipeline`, trains it and saves the trained model as a .zip file.
+2. **Evaluating** the ML model is implemented in `Evaluate()` method which runs the model against a test data (new data with known answers, that was not involved in training). As a result it produces a set of metrics describing the quality of the model.
+3. **Predicting** the sentiment is performed in the `Main()` method:
+```CSharp
+var predictions = model.Predict(TestSentimentData.Sentiments);
+```
+where you send a text as a `SentimentData` object. As a result you receive `SentimentPrediction` object that contains a boolean field `Sentiment`: true for positive, false for negative sentiments.
diff --git a/examples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentData.cs b/examples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentData.cs
@@ -0,0 +1,13 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    public class SentimentData
+    {
+        [Column("0")]
+        public string SentimentText;
+
+        [Column("1", name: "Label")]
+        public float Sentiment;
+    }
+}
diff --git a/examples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentPrediction.cs b/examples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentPrediction.cs
@@ -0,0 +1,10 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    public class SentimentPrediction
+    {
+        [ColumnName("PredictedLabel")]
+        public bool Sentiment;
+    }
+}
diff --git a/examples/GettingStarted/BinaryClassification_SentimentAnalysis/TestSentimentData.cs b/examples/GettingStarted/BinaryClassification_SentimentAnalysis/TestSentimentData.cs
@@ -0,0 +1,26 @@
+using System.Collections.Generic;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    internal class TestSentimentData
+    {
+        internal static readonly IEnumerable<SentimentData> Sentiments = new[]
+        {
+            new SentimentData
+            {
+                SentimentText = "Contoso's 11 is a wonderful experience",
+                Sentiment = 0
+            },
+            new SentimentData
+            {
+                SentimentText = "The acting in this movie is very bad",
+                Sentiment = 0
+            },
+            new SentimentData
+            {
+                SentimentText = "Joe versus the Volcano Coffee Company is a great film.",
+                Sentiment = 0
+            }
+        };
+    }
+}
diff --git a/examples/GettingStarted/GettingStarted.sln b/examples/GettingStarted/GettingStarted.sln
@@ -0,0 +1,37 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.27703.2000
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Regression_TaxiFarePrediction", "Regression_TaxiFarePrediction\Regression_TaxiFarePrediction.csproj", "{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BinaryClassification_SentimentAnalysis", "BinaryClassification_SentimentAnalysis\BinaryClassification_SentimentAnalysis.csproj", "{ED877F56-5304-4F0D-A75C-4C77219C8D0E}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MulticlassClassification_Iris", "MulticlassClassification_Iris\MulticlassClassification_Iris.csproj", "{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Release|Any CPU.Build.0 = Release|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Release|Any CPU.Build.0 = Release|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {B84E804C-06CA-45C8-9B9F-8F69CA930535}
+	EndGlobalSection
+EndGlobal
diff --git a/examples/GettingStarted/MulticlassClassification_Iris/IrisData.cs b/examples/GettingStarted/MulticlassClassification_Iris/IrisData.cs
@@ -0,0 +1,22 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace MulticlassClassification_Iris
+{
+    public class IrisData
+    {
+        [Column("0")]
+        public float Label;
+
+        [Column("1")]
+        public float SepalLength;
+
+        [Column("2")]
+        public float SepalWidth;
+
+        [Column("3")]
+        public float PetalLength;
+
+        [Column("4")]
+        public float PetalWidth;
+    }
+}
diff --git a/examples/GettingStarted/MulticlassClassification_Iris/IrisPrediction.cs b/examples/GettingStarted/MulticlassClassification_Iris/IrisPrediction.cs
@@ -0,0 +1,10 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace MulticlassClassification_Iris
+{
+    public class IrisPrediction
+    {
+        [ColumnName("Score")]
+        public float[] Score;
+    }
+}
diff --git a/examples/GettingStarted/MulticlassClassification_Iris/MulticlassClassification_Iris.csproj b/examples/GettingStarted/MulticlassClassification_Iris/MulticlassClassification_Iris.csproj
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp2.0</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Content Include="..\..\datasets\iris_test.txt" Link="datasets\iris_test.txt">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+    <Content Include="..\..\datasets\iris_train.txt" Link="datasets\iris_train.txt">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.ML" Version="0.1.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Folder Include="datasets\" />
+  </ItemGroup>
+
+</Project>