Skip to content

Commit c67dd08

Browse files
committed
Added functionality to load images as VBuffer<byte> in ImageLoader. If no DataViewType options are provided it defaults to loading images as ImageDataViewType. Made LoadImages a part of the sample in ResnetV2101TransferLearningTrainTestSplit.cs. Addressed some of the comments from Zeeshan and Yael. Added a unit test for testing the API. Added TargetFrameworks to get cross platform functionality for System.IO.Stream.Read(Span<Byte>) which doesn't work for netstandard2.0.
1 parent ae2ac0d commit c67dd08

File tree

9 files changed

+246
-128
lines changed

9 files changed

+246
-128
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs

+24-58
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,22 @@ public static void Example()
5656

5757
IDataView trainDataset = trainTestData.TrainSet;
5858
IDataView testDataset = trainTestData.TestSet;
59-
60-
var pipeline = mlContext.Model.ImageClassification(
61-
"ImageVBuf", "Label",
62-
// Just by changing/selecting InceptionV3 here instead of
63-
// ResnetV2101 you can try a different architecture/pre-trained
64-
// model.
65-
arch: ImageClassificationEstimator.Architecture.ResnetV2101,
66-
epoch: 50,
67-
batchSize: 10,
68-
learningRate: 0.01f,
69-
metricsCallback: (metrics) => Console.WriteLine(metrics),
70-
validationSet: testDataset);
59+
var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, new VectorDataViewType(NumberDataViewType.Byte), "ImagePath")
60+
.Fit(testDataset)
61+
.Transform(testDataset);
62+
63+
var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, new VectorDataViewType(NumberDataViewType.Byte), "ImagePath")
64+
.Append(mlContext.Model.ImageClassification(
65+
"Image", "Label",
66+
// Just by changing/selecting InceptionV3 here instead of
67+
// ResnetV2101 you can try a different architecture/pre-trained
68+
// model.
69+
arch: ImageClassificationEstimator.Architecture.ResnetV2101,
70+
epoch: 50,
71+
batchSize: 10,
72+
learningRate: 0.01f,
73+
metricsCallback: (metrics) => Console.WriteLine(metrics),
74+
validationSet: validationSet));
7175

7276

7377
Console.WriteLine("*** Training the image classification model with " +
@@ -82,7 +86,7 @@ public static void Example()
8286
watch.Stop();
8387
long elapsedMs = watch.ElapsedMilliseconds;
8488

85-
Console.WriteLine("Training with transfer learning took: " +
89+
Console.WriteLine("Training with transfer learning took: " +
8690
(elapsedMs / 1000).ToString() + " seconds");
8791

8892
mlContext.Model.Save(trainedModel, shuffledFullImagesDataset.Schema,
@@ -99,7 +103,7 @@ public static void Example()
99103
loadedModel.GetOutputSchema(schema)["Label"].GetKeyValues(ref keys);
100104

101105
watch = System.Diagnostics.Stopwatch.StartNew();
102-
TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel,
106+
TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel,
103107
keys.DenseValues().ToArray());
104108

105109
watch.Stop();
@@ -133,8 +137,7 @@ private static void TrySinglePrediction(string imagesForPredictions,
133137

134138
ImageData imageToPredict = new ImageData
135139
{
136-
ImagePath = testImages.First().ImagePath,
137-
ImageVBuf = imgData
140+
ImagePath = testImages.First().ImagePath
138141
};
139142

140143
var prediction = predictionEngine.Predict(imageToPredict);
@@ -168,45 +171,15 @@ private static void EvaluateModel(MLContext mlContext,
168171
Console.WriteLine("Predicting and Evaluation took: " +
169172
(elapsed2Ms / 1000).ToString() + " seconds");
170173
}
171-
172-
public static int LoadDataIntoBuffer(string path, ref VBuffer<Byte> imgData)
173-
{
174-
int count = -1;
175-
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
176-
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1))
177-
{
178-
long fileLength = fs.Length;
179-
if (fileLength > int.MaxValue)
180-
throw new IOException($"File {path} too big to open.");
181-
else if (fileLength == 0)
182-
{
183-
byte[] _imageBuffer;
184-
185-
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
186-
// Thus we need to assume 0 doesn't mean empty.
187-
_imageBuffer = File.ReadAllBytes(path);
188-
count = _imageBuffer.Length;
189-
Console.WriteLine("File length is zero");
190-
}
191-
192-
count = (int)fileLength;
193-
var editor = VBufferEditor.Create(ref imgData, count);
194-
fs.Read(editor.Values);
195-
imgData = editor.Commit();
196-
197-
return count;
198-
}
199-
}
200-
174+
201175
public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
202176
bool useFolderNameasLabel = true)
203177
{
204178
var files = Directory.GetFiles(folder, "*",
205179
searchOption: SearchOption.AllDirectories);
206-
VBuffer<Byte> imgData = new VBuffer<byte>();
207180
foreach (var file in files)
208181
{
209-
if (Path.GetExtension(file) != ".JPEG" && Path.GetExtension(file) != ".jpg")
182+
if (Path.GetExtension(file) != ".JPEG" && Path.GetExtension(file) != ".jpg")
210183
continue;
211184

212185
var label = Path.GetFileName(file);
@@ -223,15 +196,11 @@ public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
223196
}
224197
}
225198
}
226-
227-
// Get the buffer of bytes
228-
int imgSize = LoadDataIntoBuffer(file, ref imgData);
229-
199+
230200
yield return new ImageData()
231201
{
232202
ImagePath = file,
233-
Label = label,
234-
ImageVBuf = imgData
203+
Label = label
235204
};
236205

237206
}
@@ -328,8 +297,6 @@ public class ImageData
328297

329298
[LoadColumn(1)]
330299
public string Label;
331-
332-
public VBuffer<byte> ImageVBuf;
333300
}
334301

335302
public class ImagePrediction
@@ -341,5 +308,4 @@ public class ImagePrediction
341308
public UInt32 PredictedLabel;
342309
}
343310
}
344-
}
345-
311+
}

docs/samples/Microsoft.ML.Samples/Program.cs

+1-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ public static class Program
1010

1111
internal static void RunAll()
1212
{
13-
1413
int samples = 0;
1514
foreach (var type in Assembly.GetExecutingAssembly().GetTypes())
1615
{
@@ -24,8 +23,7 @@ internal static void RunAll()
2423
}
2524
}
2625

27-
Console.WriteLine("Number of samples that ran without any exception: " + samples);
28-
26+
Console.WriteLine("Number of samples that ran without any exception: " + samples);
2927
}
3028
}
3129
}

src/Microsoft.ML.Core/Data/IEstimator.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ internal Column(string name, VectorKind vecKind, DataViewType itemType, bool isK
6464
Contracts.CheckNonEmpty(name, nameof(name));
6565
Contracts.CheckValueOrNull(annotations);
6666
Contracts.CheckParam(!(itemType is KeyDataViewType), nameof(itemType), "Item type cannot be a key");
67-
Contracts.CheckParam(!(itemType is VectorDataViewType), nameof(itemType), "Item type cannot be a vector");
67+
//Contracts.CheckParam(!(itemType is VectorDataViewType), nameof(itemType), "Item type cannot be a vector");
6868
Contracts.CheckParam(!isKey || KeyDataViewType.IsValidDataType(itemType.RawType), nameof(itemType), "The item type must be valid for a key");
6969

7070
Name = name;

src/Microsoft.ML.Dnn/ImageClassificationTransform.cs

+9-9
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ private void CheckTrainingParameters(ImageClassificationEstimator.Options option
189189
return (jpegData, resizedImage);
190190
}
191191

192-
private static Tensor Encode(VBuffer<byte> buffer)
192+
private static Tensor EncodeByteAsString(VBuffer<byte> buffer)
193193
{
194194
int length = buffer.Length;
195195
var size = c_api.TF_StringEncodedSize((UIntPtr)length);
@@ -220,9 +220,9 @@ public ImageProcessor(ImageClassificationTransformer transformer)
220220
_imagePreprocessingRunner.AddOutputs(transformer._resizedImageTensorName);
221221
}
222222

223-
public Tensor ProcessImage(VBuffer<byte> imgBuf)
223+
public Tensor ProcessImage(VBuffer<byte> imageBuffer)
224224
{
225-
var imageTensor = Encode(imgBuf);
225+
var imageTensor = EncodeByteAsString(imageBuffer);
226226
var processedTensor = _imagePreprocessingRunner.AddInput(imageTensor, 0).Run()[0];
227227
imageTensor.Dispose();
228228
return processedTensor;
@@ -240,26 +240,26 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
240240
labelColumnName, typeof(uint).ToString(),
241241
labelColumn.Type.RawType.ToString());
242242

243-
var imageBufColumn = input.Schema[imageColumnName];
243+
var imageColumn = input.Schema[imageColumnName];
244244
Runner runner = new Runner(_session);
245245
runner.AddOutputs(outputTensorName);
246246

247247
using (TextWriter writer = File.CreateText(cacheFilePath))
248-
using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imageBufColumn.Index)))
248+
using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imageColumn.Index)))
249249
{
250250
var labelGetter = cursor.GetGetter<uint>(labelColumn);
251-
var imageBufGetter = cursor.GetGetter<VBuffer<byte>>(imageBufColumn);
251+
var imageGetter = cursor.GetGetter<VBuffer<byte>>(imageColumn);
252252
UInt32 label = UInt32.MaxValue;
253-
VBuffer<byte> imageBuf = default;
253+
VBuffer<byte> image = default;
254254
runner.AddInput(inputTensorName);
255255
ImageClassificationMetrics metrics = new ImageClassificationMetrics();
256256
metrics.Bottleneck = new BottleneckMetrics();
257257
metrics.Bottleneck.DatasetUsed = dataset;
258258
while (cursor.MoveNext())
259259
{
260260
labelGetter(ref label);
261-
imageBufGetter(ref imageBuf);
262-
var imageTensor = imageProcessor.ProcessImage(imageBuf);
261+
imageGetter(ref image);
262+
var imageTensor = imageProcessor.ProcessImage(image);
263263
runner.AddInput(imageTensor, 0);
264264
var featurizedImage = runner.Run()[0]; // Reuse memory?
265265
writer.WriteLine(label - 1 + "," + string.Join(",", featurizedImage.ToArray<float>()));

src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs

+47
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,53 @@ internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog,
9797
env.CheckValue(columns, nameof(columns));
9898
return new ImageLoadingEstimator(env, imageFolder, InputOutputColumnPair.ConvertToValueTuples(columns));
9999
}
100+
/// <summary>
101+
/// Create a <see cref="ImageLoadingEstimator"/>, which loads the data from the column specified in <paramref name="inputColumnName"/>
102+
/// as an image to a new column: <paramref name="outputColumnName"/>.
103+
/// </summary>
104+
/// <param name="catalog">The transform's catalog.</param>
105+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
106+
/// This column's data type will be <see cref="VectorDataViewType"/>.</param>
107+
/// <param name="inputColumnName">Name of the column with paths to the images to load.
108+
/// This estimator operates over text data.</param>
109+
/// <param name="imageFolder">Folder where to look for images.</param>
110+
/// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
111+
/// <example>
112+
/// <format type="text/markdown">
113+
/// <![CDATA[
114+
/// [!code-csharp[LoadImages](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)]
115+
/// ]]></format>
116+
/// </example>
117+
public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, DataViewType type, string inputColumnName = null)
118+
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, type, new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
119+
120+
/// <summary>
121+
/// Loads the images from the <see cref="ImageLoadingTransformer.ImageFolder" /> into memory.
122+
/// </summary>
123+
/// <remarks>
124+
/// The image get loaded in memory as a <see cref="VectorDataViewType" /> of bytes.
125+
/// Loading is the first step of almost every pipeline that does image processing, and further analysis on images.
126+
/// The images to load need to be in the formats supported by <see cref = "VectorDataViewType" /> of bytes.
127+
/// For end-to-end image processing pipelines, and scenarios in your applications, see the
128+
/// <a href="https://github.com/dotnet/machinelearning-samples/tree/master/samples/csharp/getting-started"> examples in the machinelearning-samples github repository.</a>
129+
/// </remarks>
130+
/// <param name="catalog">The transform's catalog.</param>
131+
/// <param name="imageFolder">Folder where to look for images.</param>
132+
/// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
133+
/// <param name="columns">Specifies the names of the input columns for the transformation, and their respective output column names.</param>
134+
/// <example>
135+
/// <format type="text/markdown">
136+
/// <![CDATA[
137+
/// [!code-csharp[LoadImagesAsBytes](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ImageAnalytics/LoadImages.cs)]
138+
/// ]]></format>
139+
/// </example>
140+
[BestFriend]
141+
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, DataViewType type, params InputOutputColumnPair[] columns)
142+
{
143+
var env = CatalogUtils.GetEnvironment(catalog);
144+
env.CheckValue(columns, nameof(columns));
145+
return new ImageLoadingEstimator(env, imageFolder, type, InputOutputColumnPair.ConvertToValueTuples(columns));
146+
}
100147

101148
/// <summary>
102149
/// Create a <see cref="ImagePixelExtractingEstimator"/>, which extracts pixels values from the data specified in column: <paramref name="inputColumnName"/>

0 commit comments

Comments
 (0)