|
8 | 8 | #if !NETSTANDARD2_0
|
9 | 9 | using System.Runtime.Intrinsics;
|
10 | 10 | using System.Runtime.Intrinsics.X86;
|
| 11 | + |
11 | 12 | #endif
|
12 | 13 |
|
13 | 14 | #if NET6_0_OR_GREATER
|
@@ -169,41 +170,61 @@ private void EnsureCapacity(long maximumSize)
|
169 | 170 |
|
170 | 171 | private unsafe int EstimateFrequencyStd(T value)
|
171 | 172 | {
|
172 |
| - var count = stackalloc int[4]; |
173 | 173 | int blockHash = Spread(comparer.GetHashCode(value));
|
174 | 174 | int counterHash = Rehash(blockHash);
|
175 | 175 | int block = (blockHash & blockMask) << 3;
|
176 | 176 |
|
177 |
| - for (int i = 0; i < 4; i++) |
178 |
| - { |
179 |
| - int h = (int)((uint)counterHash >> (i << 3)); |
180 |
| - int index = (h >> 1) & 15; |
181 |
| - int offset = h & 1; |
182 |
| - count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL); |
183 |
| - } |
184 |
| - return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3])); |
| 177 | + // Loop unrolling improves throughput |
| 178 | + int h0 = counterHash; |
| 179 | + int h1 = counterHash >>> 8; |
| 180 | + int h2 = counterHash >>> 16; |
| 181 | + int h3 = counterHash >>> 24; |
| 182 | + |
| 183 | + int index0 = (h0 >>> 1) & 15; |
| 184 | + int index1 = (h1 >>> 1) & 15; |
| 185 | + int index2 = (h2 >>> 1) & 15; |
| 186 | + int index3 = (h3 >>> 1) & 15; |
| 187 | + |
| 188 | + int slot0 = block + (h0 & 1); |
| 189 | + int slot1 = block + (h1 & 1) + 2; |
| 190 | + int slot2 = block + (h2 & 1) + 4; |
| 191 | + int slot3 = block + (h3 & 1) + 6; |
| 192 | + |
| 193 | + int count0 = (int)((table[slot0] >>> (index0 << 2)) & 0xfL); |
| 194 | + int count1 = (int)((table[slot1] >>> (index1 << 2)) & 0xfL); |
| 195 | + int count2 = (int)((table[slot2] >>> (index2 << 2)) & 0xfL); |
| 196 | + int count3 = (int)((table[slot3] >>> (index3 << 2)) & 0xfL); |
| 197 | + |
| 198 | + return Math.Min(Math.Min(count0, count1), Math.Min(count2, count3)); |
185 | 199 | }
|
186 | 200 |
|
187 | 201 | private unsafe void IncrementStd(T value)
|
188 | 202 | {
|
189 |
| - var index = stackalloc int[8]; |
190 | 203 | int blockHash = Spread(comparer.GetHashCode(value));
|
191 | 204 | int counterHash = Rehash(blockHash);
|
192 | 205 | int block = (blockHash & blockMask) << 3;
|
193 | 206 |
|
194 |
| - for (int i = 0; i < 4; i++) |
195 |
| - { |
196 |
| - int h = (int)((uint)counterHash >> (i << 3)); |
197 |
| - index[i] = (h >> 1) & 15; |
198 |
| - int offset = h & 1; |
199 |
| - index[i + 4] = block + offset + (i << 1); |
200 |
| - } |
| 207 | + // Loop unrolling improves throughput |
| 208 | + int h0 = counterHash; |
| 209 | + int h1 = counterHash >>> 8; |
| 210 | + int h2 = counterHash >>> 16; |
| 211 | + int h3 = counterHash >>> 24; |
| 212 | + |
| 213 | + int index0 = (h0 >>> 1) & 15; |
| 214 | + int index1 = (h1 >>> 1) & 15; |
| 215 | + int index2 = (h2 >>> 1) & 15; |
| 216 | + int index3 = (h3 >>> 1) & 15; |
| 217 | + |
| 218 | + int slot0 = block + (h0 & 1); |
| 219 | + int slot1 = block + (h1 & 1) + 2; |
| 220 | + int slot2 = block + (h2 & 1) + 4; |
| 221 | + int slot3 = block + (h3 & 1) + 6; |
201 | 222 |
|
202 | 223 | bool added =
|
203 |
| - IncrementAt(index[4], index[0]) |
204 |
| - | IncrementAt(index[5], index[1]) |
205 |
| - | IncrementAt(index[6], index[2]) |
206 |
| - | IncrementAt(index[7], index[3]); |
| 224 | + IncrementAt(slot0, index0) |
| 225 | + | IncrementAt(slot1, index1) |
| 226 | + | IncrementAt(slot2, index2) |
| 227 | + | IncrementAt(slot3, index3); |
207 | 228 |
|
208 | 229 | if (added && (++size == sampleSize))
|
209 | 230 | {
|
|
0 commit comments