dotnet · michaelgsharp · Apr 26, 2023 · Feb 13, 2023 · Mar 22, 2023 · Mar 22, 2023
diff --git a/src/Microsoft.ML.AutoML/ColumnInference/ColumnGroupingInference.cs b/src/Microsoft.ML.AutoML/ColumnInference/ColumnGroupingInference.cs
@@ -8,6 +8,8 @@
 using Microsoft.ML.Data;
 using static Microsoft.ML.Data.TextLoader;
 
+using Range = Microsoft.ML.Data.TextLoader.Range;
+
 namespace Microsoft.ML.AutoML
 {
     /// <summary>

diff --git a/src/Microsoft.ML.ImageAnalytics/MLImage.cs b/src/Microsoft.ML.ImageAnalytics/MLImage.cs
@@ -2,13 +2,15 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using Microsoft.ML.Transforms.Image;
 using SkiaSharp;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using static Microsoft.ML.Transforms.Image.ImagePixelExtractingEstimator;
 
 namespace Microsoft.ML.Data
 {
@@ -126,6 +128,47 @@ private set
             }
         }
 
+        public ReadOnlySpan<byte> PixelsNoAlpha
+        {
+            get
+            {
+                ThrowInvalidOperationExceptionIfDisposed();
+
+                GetOrder(ColorsOrder.ARGB, ColorBits.Rgb, out int a, out int r, out int b, out int g);
+
+                // 3 is because we only want RGB not alpha channels
+                byte[] pixels = new byte[Height * Width * 3];
+
+                (int alphaIndex, int redIndex, int greenIndex, int blueIndex) = PixelFormat switch
+                {
+                    MLPixelFormat.Bgra32 => (3, 2, 1, 0),
+                    MLPixelFormat.Rgba32 => (3, 0, 1, 2),
+                    _ => throw new InvalidOperationException($"Image pixel format is not supported")
+                };
+
+                int cpix = Height * Width;
+                int ix = 0;
+                int pixelByteCount = alphaIndex > 0 ? 4 : 3;
+                int idstMin = 0;
+
+                for (int y = 0; y < Height; ++y)
+                {
+                    int idst = idstMin + y * Width;
+                    for (int x = 0; x < Width; x++, idst++)
+                    {
+                        if (a != -1) pixels[idst + cpix * a] = (byte)(alphaIndex > 0 ? Pixels[ix + alphaIndex] : 255);
+                        if (r != -1) pixels[idst + cpix * r] = Pixels[ix + redIndex];
+                        if (g != -1) pixels[idst + cpix * g] = Pixels[ix + greenIndex];
+                        if (b != -1) pixels[idst + cpix * b] = Pixels[ix + blueIndex];
+
+                        ix += pixelByteCount;
+                    }
+                }
+
+                return new ReadOnlySpan<byte>(pixels);
+            }
+        }
+
         /// <summary>
         /// Gets the image pixel data.
         /// </summary>

diff --git a/src/Microsoft.ML.TorchSharp/AutoFormerV2/Anchors.cs b/src/Microsoft.ML.TorchSharp/AutoFormerV2/Anchors.cs
@@ -0,0 +1,153 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using TorchSharp;
+using static TorchSharp.torch;
+using static TorchSharp.torch.nn;
+
+namespace Microsoft.ML.TorchSharp.AutoFormerV2
+{
+    /// <summary>
+    /// Anchor boxes are a set of predefined bounding boxes of a certain height and width, whose location and size can be adjusted by the regression head of model.
+    /// </summary>
+    public class Anchors : Module<Tensor, Tensor>
+    {
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_PrivateFieldName:private field names not in _camelCase format", Justification = "Need to match TorchSharp.")]
+        private readonly int[] pyramidLevels;
+
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_PrivateFieldName:private field names not in _camelCase format", Justification = "Need to match TorchSharp.")]
+        private readonly int[] strides;
+
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_PrivateFieldName:private field names not in _camelCase format", Justification = "Need to match TorchSharp.")]
+        private readonly int[] sizes;
+
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_PrivateFieldName:private field names not in _camelCase format", Justification = "Need to match TorchSharp.")]
+        private readonly double[] ratios;
+
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_PrivateFieldName:private field names not in _camelCase format", Justification = "Need to match TorchSharp.")]
+        private readonly double[] scales;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Anchors"/> class.
+        /// </summary>
+        /// <param name="pyramidLevels">Pyramid levels.</param>
+        /// <param name="strides">Strides between adjacent bboxes.</param>
+        /// <param name="sizes">Different sizes for bboxes.</param>
+        /// <param name="ratios">Different ratios for height/width.</param>
+        /// <param name="scales">Scale size of bboxes.</param>
+        public Anchors(int[] pyramidLevels = null, int[] strides = null, int[] sizes = null, double[] ratios = null, double[] scales = null)
+            : base(nameof(Anchors))
+        {
+            this.pyramidLevels = pyramidLevels != null ? pyramidLevels : new int[] { 3, 4, 5, 6, 7 };
+            this.strides = strides != null ? strides : this.pyramidLevels.Select(x => (int)Math.Pow(2, x)).ToArray();
+            this.sizes = sizes != null ? sizes : this.pyramidLevels.Select(x => (int)Math.Pow(2, x + 2)).ToArray();
+            this.ratios = ratios != null ? ratios : new double[] { 0.5, 1, 2 };
+            this.scales = scales != null ? scales : new double[] { Math.Pow(2, 0), Math.Pow(2, 1.0 / 3.0), Math.Pow(2, 2.0 / 3.0) };
+        }
+
+        /// <summary>
+        /// Generate anchors for an image.
+        /// </summary>
+        /// <param name="image">Image in Tensor format.</param>
+        /// <returns>All anchors.</returns>
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_GeneralName:This name should be PascalCased", Justification = "Need to match TorchSharp.")]
+        public override Tensor forward(Tensor image)
+        {
+            using (var scope = torch.NewDisposeScope())
+            {
+                var imageShape = torch.tensor(image.shape.AsSpan().Slice(2).ToArray());
+
+                // compute anchors over all pyramid levels
+                var allAnchors = torch.zeros(new long[] { 0, 4 }, dtype: torch.float32);
+
+                for (int idx = 0; idx < this.pyramidLevels.Length; ++idx)
+                {
+                    var x = this.pyramidLevels[idx];
+                    var shape = ((imageShape + Math.Pow(2, x) - 1) / Math.Pow(2, x)).to_type(torch.int32);
+                    var anchors = GenerateAnchors(
+                        baseSize: this.sizes[idx],
+                        ratios: this.ratios,
+                        scales: this.scales);
+                    var shiftedAnchors = Shift(shape, this.strides[idx], anchors);
+                    allAnchors = torch.cat(new List<Tensor>() { allAnchors, shiftedAnchors }, dim: 0);
+                }
+
+                var output = allAnchors.unsqueeze(dim: 0);
+                output = output.to(image.device);
+
+                return output.MoveToOuterDisposeScope();
+            }
+        }
+
+        /// <summary>
+        /// Generate a set of anchors given size, ratios and scales.
+        /// </summary>
+        /// <param name="baseSize">Base size for width and height.</param>
+        /// <param name="ratios">Ratios for height/width.</param>
+        /// <param name="scales">Scales to resize base size.</param>
+        /// <returns>A set of anchors.</returns>
+        private static Tensor GenerateAnchors(int baseSize = 16, double[] ratios = null, double[] scales = null)
+        {
+            using (var anchorsScope = torch.NewDisposeScope())
+            {
+                ratios ??= new double[] { 0.5, 1, 2 };
+                scales ??= new double[] { Math.Pow(2, 0), Math.Pow(2, 1.0 / 3.0), Math.Pow(2, 2.0 / 3.0) };
+
+                var numAnchors = ratios.Length * scales.Length;
+
+                // initialize output anchors
+                var anchors = torch.zeros(new long[] { numAnchors, 4 }, dtype: torch.float32);
+
+                // scale base_size
+                anchors[.., 2..] = baseSize * torch.tile(scales, new long[] { 2, ratios.Length }).transpose(1, 0);
+
+                // compute areas of anchors
+                var areas = torch.mul(anchors[.., 2], anchors[.., 3]);
+
+                // correct for ratios
+                anchors[.., 2] = torch.sqrt(areas / torch.repeat_interleave(ratios, new long[] { scales.Length }));
+                anchors[.., 3] = torch.mul(anchors[.., 2], torch.repeat_interleave(ratios, new long[] { scales.Length }));
+
+                // transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
+                anchors[.., torch.TensorIndex.Tensor(torch.tensor(new long[] { 0, 2 }, dtype: torch.int64))] -= torch.tile(anchors[.., 2] * 0.5, new long[] { 2, 1 }).T;
+                anchors[.., torch.TensorIndex.Tensor(torch.tensor(new long[] { 1, 3 }, dtype: torch.int64))] -= torch.tile(anchors[.., 3] * 0.5, new long[] { 2, 1 }).T;
+
+                return anchors.MoveToOuterDisposeScope();
+            }
+        }
+
+        /// <summary>
+        /// Duplicate and distribute anchors to different positions give border of positions and stride between positions.
+        /// </summary>
+        /// <param name="shape">Border to distribute anchors.</param>
+        /// <param name="stride">Stride between adjacent anchors.</param>
+        /// <param name="anchors">Anchors to distribute.</param>
+        /// <returns>The shifted anchors.</returns>
+        private static Tensor Shift(Tensor shape, int stride, Tensor anchors)
+        {
+            using (var anchorsScope = torch.NewDisposeScope())
+            {
+                Tensor shiftX = (torch.arange(start: 0, stop: (int)shape[1]) + 0.5) * stride;
+                Tensor shiftY = (torch.arange(start: 0, stop: (int)shape[0]) + 0.5) * stride;
+
+                var shiftXExpand = torch.repeat_interleave(shiftX.reshape(new long[] { shiftX.shape[0], 1 }), shiftY.shape[0], dim: 1);
+                shiftXExpand = shiftXExpand.transpose(0, 1).reshape(-1);
+                var shiftYExpand = torch.repeat_interleave(shiftY, shiftX.shape[0]);
+
+                List<Tensor> tensors = new List<Tensor> { shiftXExpand, shiftYExpand, shiftXExpand, shiftYExpand };
+                var shifts = torch.vstack(tensors).transpose(0, 1);
+
+                var a = anchors.shape[0];
+                var k = shifts.shape[0];
+                var allAnchors = anchors.reshape(new long[] { 1, a, 4 }) + shifts.reshape(new long[] { 1, k, 4 }).transpose(0, 1);
+                allAnchors = allAnchors.reshape(new long[] { k * a, 4 });
+
+                return allAnchors.MoveToOuterDisposeScope();
+            }
+        }
+    }
+}
diff --git a/src/Microsoft.ML.TorchSharp/AutoFormerV2/Attention.cs b/src/Microsoft.ML.TorchSharp/AutoFormerV2/Attention.cs
@@ -0,0 +1,135 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using TorchSharp;
+using TorchSharp.Modules;
+using static TorchSharp.torch;
+using static TorchSharp.torch.nn;
+
+namespace Microsoft.ML.TorchSharp.AutoFormerV2
+{
+    /// <summary>
+    /// The Attention layer.
+    /// </summary>
+    public class Attention : Module<Tensor, Tensor, Tensor>
+    {
+#pragma warning disable MSML_PrivateFieldName // Need to match TorchSharp model names.
+        private readonly int numHeads;
+        private readonly double scale;
+        private readonly int keyChannels;
+        private readonly int nHkD;
+        private readonly int d;
+        private readonly int dh;
+        private readonly double attnRatio;
+
+        private readonly LayerNorm norm;
+        private readonly Linear qkv;
+        private readonly Linear proj;
+        private readonly Parameter attention_biases;
+        private readonly TensorIndex attention_bias_idxs;
+        private readonly Softmax softmax;
+#pragma warning restore MSML_PrivateFieldName
+
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Attention"/> class.
+        /// </summary>
+        /// <param name="inChannels">The input channels.</param>
+        /// <param name="keyChannels">The key channels.</param>
+        /// <param name="numHeads">The number of blocks.</param>
+        /// <param name="attnRatio">The ratio of attention.</param>
+        /// <param name="windowResolution">The resolution of window.</param>
+        public Attention(int inChannels, int keyChannels, int numHeads = 8, int attnRatio = 4, List<int> windowResolution = null)
+            : base(nameof(Attention))
+        {
+            windowResolution ??= new List<int>() { 14, 14 };
+            this.numHeads = numHeads;
+            this.scale = System.Math.Pow(keyChannels, -0.5);
+            this.keyChannels = keyChannels;
+            this.nHkD = numHeads * keyChannels;
+            this.d = attnRatio * keyChannels;
+            this.dh = this.d * numHeads;
+            this.attnRatio = attnRatio;
+            int h = this.dh + (this.nHkD * 2);
+
+            this.norm = nn.LayerNorm(new long[] { inChannels });
+            this.qkv = nn.Linear(inChannels, h);
+            this.proj = nn.Linear(this.dh, inChannels);
+
+            var points = new List<List<int>>();
+            for (int i = 0; i < windowResolution[0]; i++)
+            {
+                for (int j = 0; j < windowResolution[1]; j++)
+                {
+                    points.Add(new List<int>() { i, j });
+                }
+            }
+
+            int n = points.Count;
+            var attentionOffsets = new Dictionary<Tuple<int, int>, int>();
+            var idxs = new List<int>();
+            var idxsTensor = torch.zeros(new long[] { n, n }, dtype: torch.int64);
+            for (int i = 0; i < n; i++)
+            {
+                for (int j = 0; j < n; j++)
+                {
+                    var offset = new Tuple<int, int>(Math.Abs(points[i][0] - points[j][0]), Math.Abs(points[i][1] - points[j][1]));
+                    if (!attentionOffsets.ContainsKey(offset))
+                    {
+                        attentionOffsets.Add(offset, attentionOffsets.Count);
+                    }
+
+                    idxs.Add(attentionOffsets[offset]);
+                    idxsTensor[i][j] = attentionOffsets[offset];
+                }
+            }
+
+            this.attention_biases = nn.Parameter(torch.zeros(numHeads, attentionOffsets.Count));
+            this.attention_bias_idxs = TensorIndex.Tensor(idxsTensor);
+            this.softmax = nn.Softmax(dim: -1);
+        }
+
+        /// <inheritdoc/>
+        [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", "MSML_GeneralName:This name should be PascalCased", Justification = "Need to match TorchSharp.")]
+        public override Tensor forward(Tensor x, Tensor mask)
+        {
+            using (var scope = torch.NewDisposeScope())
+            {
+                long b = x.shape[0];
+                long n = x.shape[1];
+                long c = x.shape[2];
+                x = this.norm.forward(x);
+                var qkv = this.qkv.forward(x);
+                qkv = qkv.view(b, n, this.numHeads, -1);
+                var tmp = qkv.split(new long[] { this.keyChannels, this.keyChannels, this.d }, dim: 3);
+                var q = tmp[0];
+                var k = tmp[1];
+                var v = tmp[2];
+                q = q.permute(0, 2, 1, 3);
+                k = k.permute(0, 2, 1, 3);
+                v = v.permute(0, 2, 1, 3);
+
+                var attn = (torch.matmul(q, k.transpose(-2, -1)) * this.scale) + this.attention_biases[.., this.attention_bias_idxs];
+                if (!(mask is null))
+                {
+                    long nW = mask.shape[0];
+                    attn = attn.view(-1, nW, this.numHeads, n, n) + mask.unsqueeze(1).unsqueeze(0);
+                    attn = attn.view(-1, this.numHeads, n, n);
+                    attn = this.softmax.forward(attn);
+                }
+                else
+                {
+                    attn = this.softmax.forward(attn);
+                }
+
+                x = torch.matmul(attn, v).transpose(1, 2).reshape(b, n, this.dh);
+                x = this.proj.forward(x);
+
+                return x.MoveToOuterDisposeScope();
+            }
+        }
+    }
+}