Skip to content

Implement LFU sketch using arm64 intrinsics (redux) #648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
merge
  • Loading branch information
bitfaster committed Nov 26, 2024
commit 3a5a72cd6f35c1add6d0663268ad3b8720b5cae6
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<LangVersion>latest</LangVersion>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<!-- https://stackoverflow.com/a/59916801/131345 -->
Expand Down Expand Up @@ -41,4 +42,4 @@
<DefineConstants>MacOS</DefineConstants>
</PropertyGroup>

</Project>
</Project>
20 changes: 16 additions & 4 deletions BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ public class SketchFrequency

private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
private CmSketchCore<int, DetectIsa> blockAvx;
private CmSketchCore<int, DetectIsa> blockVector;
private CmSketchCore512<int, DetectIsa> blockVector512;

[Params(32_768, 524_288, 8_388_608, 134_217_728)]
public int Size { get; set; }
Expand All @@ -36,7 +37,8 @@ public void Setup()

blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockVector = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockVector512 = new CmSketchCore512<int, DetectIsa>(Size, EqualityComparer<int>.Default);
}

[Benchmark(Baseline = true, OperationsPerInvoke = iterations)]
Expand Down Expand Up @@ -74,7 +76,7 @@ public int FrequencyBlockAvxNotPinned()
{
int count = 0;
for (int i = 0; i < iterations; i++)
count += blockAvxNoPin.EstimateFrequency(i) > blockAvx.EstimateFrequency(i + 1) ? 1 : 0;
count += blockAvxNoPin.EstimateFrequency(i) > blockVector.EstimateFrequency(i + 1) ? 1 : 0;

return count;
}
Expand All @@ -84,7 +86,17 @@ public int FrequencyBlockAvxPinned()
{
int count = 0;
for (int i = 0; i < iterations; i++)
count += blockAvx.EstimateFrequency(i) > blockAvx.EstimateFrequency(i + 1) ? 1 : 0;
count += blockVector.EstimateFrequency(i) > blockVector.EstimateFrequency(i + 1) ? 1 : 0;

return count;
}

[Benchmark(OperationsPerInvoke = iterations)]
public int FrequencyBlockAvxPinned512()
{
int count = 0;
for (int i = 0; i < iterations; i++)
count += blockVector512.EstimateFrequency(i) > blockVector.EstimateFrequency(i + 1) ? 1 : 0;

return count;
}
Expand Down
14 changes: 13 additions & 1 deletion BitFaster.Caching.Benchmarks/Lfu/SketchIncrement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace BitFaster.Caching.Benchmarks.Lfu
[SimpleJob(RuntimeMoniker.Net90)]
[MemoryDiagnoser(displayGenColumns: false)]
[HideColumns("Job", "Median", "RatioSD", "Alloc Ratio")]
[ColumnChart(Title = "Sketch Increment ({JOB})")]
[ColumnChart(Title = "Sketch Increment ({JOB})", Colors = "#cd5c5c,#fa8072,#ffa07a")]
public class SketchIncrement
{
const int iterations = 1_048_576;
Expand All @@ -23,6 +23,8 @@ public class SketchIncrement
private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
private CmSketchCore<int, DetectIsa> blockAvx;
private CmSketchCore512<int, DetectIsa> blockVector512;


[Params(32_768, 524_288, 8_388_608, 134_217_728)]
public int Size { get; set; }
Expand All @@ -36,6 +38,7 @@ public void Setup()
blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockVector512 = new CmSketchCore512<int, DetectIsa>(Size, EqualityComparer<int>.Default);
}

[Benchmark(Baseline = true, OperationsPerInvoke = iterations)]
Expand Down Expand Up @@ -82,5 +85,14 @@ public void IncBlockAvxPinned()
blockAvx.Increment(i);
}
}

[Benchmark(OperationsPerInvoke = iterations)]
public void IncBlockAvxPinned512()
{
for (int i = 0; i < iterations; i++)
{
blockVector512.Increment(i);
}
}
}
}
4 changes: 2 additions & 2 deletions BitFaster.Caching/BitFaster.Caching.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
<IsTrimmable>true</IsTrimmable>
<Nullable>enable</Nullable>
<!--Package Validation-->
<EnablePackageValidation>false</EnablePackageValidation>
<PackageValidationBaselineVersion>2.5.0</PackageValidationBaselineVersion>
<EnablePackageValidation>true</EnablePackageValidation>
<PackageValidationBaselineVersion>2.5.1</PackageValidationBaselineVersion>
</PropertyGroup>

<PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">
Expand Down
20 changes: 12 additions & 8 deletions BitFaster.Caching/Lfu/CmSketchCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;


#if !NETSTANDARD2_0
Expand Down Expand Up @@ -37,13 +38,14 @@ public unsafe class CmSketchCore<T, I>
private const long OneMask = 0x1111111111111111L;

private long[] table;
#if NET6_0_OR_GREATER
private long* tableAddr;
#endif
private int sampleSize;
private int blockMask;
private int size;

#if NET6_0_OR_GREATER
private long* tableAddr;
#endif

private readonly IEqualityComparer<T> comparer;

/// <summary>
Expand Down Expand Up @@ -142,7 +144,7 @@ private void EnsureCapacity(long maximumSize)

#if NET6_0_OR_GREATER
I isa = default;
if (isa.IsAvx2Supported)
if (isa.IsAvx2Supported || isa.IsArm64Supported)
{
// over alloc by 8 to give 64 bytes padding, tableAddr is then aligned to 64 bytes
const int pad = 8;
Expand Down Expand Up @@ -378,7 +380,7 @@ private unsafe void IncrementAvx(T value)
#endif

#if NET6_0_OR_GREATER
[MethodImpl(MethodImplOptions.AggressiveOptimization | MethodImplOptions.AggressiveInlining)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private unsafe void IncrementArm(T value)
{
int blockHash = Spread(comparer.GetHashCode(value));
Expand All @@ -389,7 +391,8 @@ private unsafe void IncrementArm(T value)
Vector128<int> index = AdvSimd.And(AdvSimd.ShiftRightLogical(h, 1), Vector128.Create(0xf));
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));

fixed (long* tablePtr = table)
long* tablePtr = tableAddr;
//fixed (long* tablePtr = table)
{
int t0 = AdvSimd.Extract(blockOffset, 0);
int t1 = AdvSimd.Extract(blockOffset, 1);
Expand Down Expand Up @@ -429,7 +432,7 @@ private unsafe void IncrementArm(T value)
}
}

[MethodImpl(MethodImplOptions.AggressiveOptimization | MethodImplOptions.AggressiveInlining)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private unsafe int EstimateFrequencyArm(T value)
{
int blockHash = Spread(comparer.GetHashCode(value));
Expand All @@ -440,7 +443,8 @@ private unsafe int EstimateFrequencyArm(T value)
Vector128<int> index = AdvSimd.And(AdvSimd.ShiftRightLogical(h, 1), Vector128.Create(0xf));
Vector128<int> blockOffset = AdvSimd.Add(AdvSimd.Add(Vector128.Create(block), AdvSimd.And(h, Vector128.Create(1))), Vector128.Create(0, 2, 4, 6));

fixed (long* tablePtr = table)
long* tablePtr = tableAddr;
//fixed (long* tablePtr = table)
{
Vector128<long> tableVectorA = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 0)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 1)));
Vector128<long> tableVectorB = Vector128.Create(AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 2)), AdvSimd.LoadVector64(tablePtr + AdvSimd.Extract(blockOffset, 3)));
Expand Down
Loading
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.