Skip to content

Modernize JPEG Color Converters #2917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 6, 2025
30 changes: 1 addition & 29 deletions src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -616,35 +616,7 @@ public static Vector256<float> MultiplyAdd(
return Fma.MultiplyAdd(vm1, vm0, va);
}

return Avx.Add(Avx.Multiply(vm0, vm1), va);
}

/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector128{Single}"/>.
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
/// </summary>
/// <remarks>ret = (vm0 * vm1) + va</remarks>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(InliningOptions.AlwaysInline)]
public static Vector128<float> MultiplyAdd(
Vector128<float> va,
Vector128<float> vm0,
Vector128<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(vm1, vm0, va);
}

if (AdvSimd.IsSupported)
{
return AdvSimd.Add(AdvSimd.Multiply(vm0, vm1), va);
}

return Sse.Add(Sse.Multiply(vm0, vm1), va);
return va + (vm0 * vm1);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually intend on removing this copy of the method in future PRs. Doing so in this one would have touched too many unrelated files.

}

/// <summary>
Expand Down
37 changes: 23 additions & 14 deletions src/ImageSharp/Common/Helpers/SimdUtils.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Copyright (c) Six Labors.
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace SixLabors.ImageSharp;
Expand Down Expand Up @@ -36,30 +36,39 @@ internal static Vector4 PseudoRound(this Vector4 v)

/// <summary>
/// Rounds all values in 'v' to the nearest integer following <see cref="MidpointRounding.ToEven"/> semantics.
/// Source:
/// <see>
/// <cref>https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L110</cref>
/// </see>
/// </summary>
/// <param name="v">The vector</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector<float> FastRound(this Vector<float> v)
{
if (Avx2.IsSupported)
// .NET9+ has a built-in method for this Vector.Round
if (Avx2.IsSupported && Vector<float>.Count == Vector256<float>.Count)
{
Copy link
Member Author

@JimBobSquarePants JimBobSquarePants May 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know whether this is ever false. But I wanted the check to be a little stricter given for the future potential of 512 supported Vector<T>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be false even when Avx2.IsSupported as someone can set DOTNET_PreferredVectorBitWidth=128, so having an explicit check is goodness.

Notably you have Sse41.RoundToNearestInteger that can be used for 128-bit, Avx512F.RoundScale that can be used for 512-bit, and AdvSimd.RoundToNearest that can be used for Arm64.

.NET 9+ defines Vector.Round which can be used to handle the xplat consideration without having to manually code it all (and same method exists on Vector128/256/512)

ref Vector256<float> v256 = ref Unsafe.As<Vector<float>, Vector256<float>>(ref v);
Vector256<float> vRound = Avx.RoundToNearestInteger(v256);
return Unsafe.As<Vector256<float>, Vector<float>>(ref vRound);
}
else

if (Sse41.IsSupported && Vector<float>.Count == Vector128<float>.Count)
{
ref Vector128<float> v128 = ref Unsafe.As<Vector<float>, Vector128<float>>(ref v);
Vector128<float> vRound = Sse41.RoundToNearestInteger(v128);
return Unsafe.As<Vector128<float>, Vector<float>>(ref vRound);
}

if (AdvSimd.IsSupported && Vector<float>.Count == Vector128<float>.Count)
{
var magic0 = new Vector<int>(int.MinValue); // 0x80000000
var sgn0 = Vector.AsVectorSingle(magic0);
var and0 = Vector.BitwiseAnd(sgn0, v);
var or0 = Vector.BitwiseOr(and0, new Vector<float>(8388608.0f));
var add0 = Vector.Add(v, or0);
return Vector.Subtract(add0, or0);
ref Vector128<float> v128 = ref Unsafe.As<Vector<float>, Vector128<float>>(ref v);
Vector128<float> vRound = AdvSimd.RoundToNearest(v128);
return Unsafe.As<Vector128<float>, Vector<float>>(ref vRound);
}

// https://github.com/g-truc/glm/blob/master/glm/simd/common.h#L11
Vector<float> sign = v & new Vector<float>(-0F);
Vector<float> val_2p23_f32 = sign | new Vector<float>(8388608F);

val_2p23_f32 = (v + val_2p23_f32) - val_2p23_f32;
return val_2p23_f32 | sign;
}

[Conditional("DEBUG")]
Expand Down
56 changes: 54 additions & 2 deletions src/ImageSharp/Common/Helpers/Vector128Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,65 @@ public static Vector128<int> ConvertToInt32RoundToEven(Vector128<float> vector)
return AdvSimd.ConvertToInt32RoundToEven(vector);
}

Vector128<float> sign = vector & Vector128.Create(-0.0f);
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608.0f);
Vector128<float> sign = vector & Vector128.Create(-0F);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No general helper exists for .NET 9+ here...

It's something we could probably pattern recognize as ConvertToInt32(Round(vector)) though. If it's a pattern you'd like to see us optimize, logging a suggestion on dotnet/runtime would be beneficial. Alternatively some ConvertToInt32(vector, MidpointRounding) API could be provided as well and an API suggestion could be opened.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll have a look at creating an issue. Thanks.

Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608F);

val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector128.ConvertToInt32(val_2p23_f32 | sign);
}

/// <summary>
/// Rounds all values in <paramref name="vector"/> to the nearest integer
/// following <see cref="MidpointRounding.ToEven"/> semantics.
/// </summary>
/// <param name="vector">The vector</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> RoundToNearestInteger(Vector128<float> vector)
{
if (Sse41.IsSupported)
{
return Sse41.RoundToNearestInteger(vector);
}

if (AdvSimd.IsSupported)
{
return AdvSimd.RoundToNearest(vector);
}

Vector128<float> sign = vector & Vector128.Create(-0F);
Vector128<float> val_2p23_f32 = sign | Vector128.Create(8388608F);

val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return val_2p23_f32 | sign;
}

/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector128{Single}"/>.
/// </summary>
/// <remarks>ret = (vm0 * vm1) + va</remarks>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> MultiplyAdd(
Vector128<float> va,
Vector128<float> vm0,
Vector128<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(vm1, vm0, va);
}

if (AdvSimd.IsSupported)
{
return AdvSimd.FusedMultiplyAdd(va, vm0, vm1);
}

return va + (vm0 * vm1);
}

/// <summary>
/// Packs signed 16-bit integers to unsigned 8-bit integers and saturates.
/// </summary>
Expand Down
46 changes: 44 additions & 2 deletions src/ImageSharp/Common/Helpers/Vector256Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,55 @@ public static Vector256<int> ConvertToInt32RoundToEven(Vector256<float> vector)
return Vector256.Create(lower, upper);
}

Vector256<float> sign = vector & Vector256.Create(-0.0f);
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608.0f);
Vector256<float> sign = vector & Vector256.Create(-0F);
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608F);

val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return Vector256.ConvertToInt32(val_2p23_f32 | sign);
}

/// <summary>
/// Rounds all values in <paramref name="vector"/> to the nearest integer
/// following <see cref="MidpointRounding.ToEven"/> semantics.
/// </summary>
/// <param name="vector">The vector</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> RoundToNearestInteger(Vector256<float> vector)
{
if (Avx.IsSupported)
{
return Avx.RoundToNearestInteger(vector);
}

Vector256<float> sign = vector & Vector256.Create(-0F);
Vector256<float> val_2p23_f32 = sign | Vector256.Create(8388608F);

val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return val_2p23_f32 | sign;
}

/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector256{Single}"/>.
/// </summary>
/// <remarks>ret = (vm0 * vm1) + va</remarks>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> MultiplyAdd(
Vector256<float> va,
Vector256<float> vm0,
Vector256<float> vm1)
{
if (Fma.IsSupported)
{
return Fma.MultiplyAdd(vm0, vm1, va);
}

return va + (vm0 * vm1);
}

[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}
45 changes: 45 additions & 0 deletions src/ImageSharp/Common/Helpers/Vector512Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,51 @@ public static Vector512<int> ConvertToInt32RoundToEven(Vector512<float> vector)
return Vector512.ConvertToInt32(val_2p23_f32 | sign);
}

/// <summary>
/// Rounds all values in <paramref name="vector"/> to the nearest integer
/// following <see cref="MidpointRounding.ToEven"/> semantics.
/// </summary>
/// <param name="vector">The vector</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<float> RoundToNearestInteger(Vector512<float> vector)
{
if (Avx512F.IsSupported)
{
// imm8 = 0b1000:
// imm8[7:4] = 0b0000 -> preserve 0 fractional bits (round to whole numbers)
// imm8[3:0] = 0b1000 -> _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC (round to nearest even, suppress exceptions)
return Avx512F.RoundScale(vector, 0b0000_1000);
}

Vector512<float> sign = vector & Vector512.Create(-0F);
Vector512<float> val_2p23_f32 = sign | Vector512.Create(8388608F);

val_2p23_f32 = (vector + val_2p23_f32) - val_2p23_f32;
return val_2p23_f32 | sign;
}

/// <summary>
/// Performs a multiplication and an addition of the <see cref="Vector512{Single}"/>.
/// </summary>
/// <remarks>ret = (vm0 * vm1) + va</remarks>
/// <param name="va">The vector to add to the intermediate result.</param>
/// <param name="vm0">The first vector to multiply.</param>
/// <param name="vm1">The second vector to multiply.</param>
/// <returns>The <see cref="Vector256{T}"/>.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector512<float> MultiplyAdd(
Vector512<float> va,
Vector512<float> vm0,
Vector512<float> vm1)
{
if (Avx512F.IsSupported)
{
return Avx512F.FusedMultiplyAdd(vm0, vm1, va);
}

return va + (vm0 * vm1);
}

[DoesNotReturn]
private static void ThrowUnreachableException() => throw new UnreachableException();
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ public CmykScalar(int precision)
}

/// <inheritdoc/>
public override void ConvertToRgbInplace(in ComponentValues values) =>
ConvertToRgbInplace(values, this.MaximumValue);
public override void ConvertToRgbInPlace(in ComponentValues values) =>
ConvertToRgbInPlace(values, this.MaximumValue);

/// <inheritdoc/>
public override void ConvertFromRgb(in ComponentValues values, Span<float> r, Span<float> g, Span<float> b)
=> ConvertFromRgb(values, this.MaximumValue, r, g, b);
public override void ConvertFromRgb(in ComponentValues values, Span<float> rLane, Span<float> gLane, Span<float> bLane)
=> ConvertFromRgb(values, this.MaximumValue, rLane, gLane, bLane);

public static void ConvertToRgbInplace(in ComponentValues values, float maxValue)
public static void ConvertToRgbInPlace(in ComponentValues values, float maxValue)
{
Span<float> c0 = values.Component0;
Span<float> c1 = values.Component1;
Expand All @@ -42,7 +42,7 @@ public static void ConvertToRgbInplace(in ComponentValues values, float maxValue
}
}

public static void ConvertFromRgb(in ComponentValues values, float maxValue, Span<float> r, Span<float> g, Span<float> b)
public static void ConvertFromRgb(in ComponentValues values, float maxValue, Span<float> rLane, Span<float> gLane, Span<float> bLane)
{
Span<float> c = values.Component0;
Span<float> m = values.Component1;
Expand All @@ -51,9 +51,9 @@ public static void ConvertFromRgb(in ComponentValues values, float maxValue, Spa

for (int i = 0; i < c.Length; i++)
{
float ctmp = 255f - r[i];
float mtmp = 255f - g[i];
float ytmp = 255f - b[i];
float ctmp = 255f - rLane[i];
float mtmp = 255f - gLane[i];
float ytmp = 255f - bLane[i];
float ktmp = MathF.Min(MathF.Min(ctmp, mtmp), ytmp);

if (ktmp >= 255f)
Expand Down
Loading
Loading