Skip to content

Commit b4b7913

Browse files
committed
AsciiString optimized hashCode
Motivation: The AsciiString.hashCode() method can be optimized. This method is frequently used while to build the DefaultHeaders data structure. Modification: - Add a PlatformDependent hashCode algorithm which utilizes UNSAFE if available Result: AsciiString hashCode is faster.
1 parent 0d71744 commit b4b7913

File tree

7 files changed

+564
-113
lines changed

7 files changed

+564
-113
lines changed

common/src/main/java/io/netty/util/AsciiString.java

Lines changed: 16 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@
3030
import java.util.regex.Pattern;
3131
import java.util.regex.PatternSyntaxException;
3232

33-
import static io.netty.util.internal.ObjectUtil.checkNotNull;
3433
import static io.netty.util.internal.MathUtil.isOutOfBounds;
34+
import static io.netty.util.internal.ObjectUtil.checkNotNull;
3535

3636
/**
3737
* A string which has been encoded into a character encoding whose character always takes a single byte, similarly to
3838
* ASCII. It internally keeps its content in a byte array unlike {@link String}, which uses a character array, for
3939
* reduced memory footprint and faster data transfer from/to byte-based data structures such as a byte array and
40-
* {@link ByteBuffer}. It is often used in conjunction with {@link TextHeaders}.
40+
* {@link ByteBuffer}. It is often used in conjunction with {@link Headers} that require a {@link CharSequence}.
4141
* <p>
4242
* This class was designed to provide an immutable array of bytes, and caches some internal state based upon the value
4343
* of this array. However underlying access to this byte array is provided via not copying the array on construction or
@@ -47,7 +47,6 @@
4747
public final class AsciiString implements CharSequence, Comparable<CharSequence> {
4848
public static final AsciiString EMPTY_STRING = new AsciiString("");
4949
private static final char MAX_CHAR_VALUE = 255;
50-
private static final int HASH_CODE_PRIME = 31;
5150

5251
/**
5352
* If this value is modified outside the constructor then call {@link #arrayChanged()}.
@@ -1085,29 +1084,19 @@ public AsciiString[] split(char delim) {
10851084
return res.toArray(new AsciiString[res.size()]);
10861085
}
10871086

1087+
/**
1088+
* {@inheritDoc}
1089+
* <p>
1090+
* Provides a case-insensitive hash code for Ascii like byte strings.
1091+
*/
10881092
@Override
10891093
public int hashCode() {
1090-
int h = hash;
1091-
if (h == 0) {
1092-
final int end = arrayOffset() + length();
1093-
for (int i = arrayOffset(); i < end; ++i) {
1094-
// masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash
1095-
// code the same regardless of character case (upper case or lower case hash is the same).
1096-
h = h * HASH_CODE_PRIME + (value[i] & 0x1F);
1097-
}
1098-
1099-
hash = h;
1094+
if (hash == 0) {
1095+
hash = PlatformDependent.hashCodeAscii(value, offset, length);
11001096
}
11011097
return hash;
11021098
}
11031099

1104-
/**
1105-
* Generate a hash code that will be consistent regardless of ASCII character casing.
1106-
*/
1107-
public int hashCodeCaseInsensitive() {
1108-
return hashCode();
1109-
}
1110-
11111100
@Override
11121101
public boolean equals(Object obj) {
11131102
if (obj == null || obj.getClass() != AsciiString.class) {
@@ -1118,9 +1107,9 @@ public boolean equals(Object obj) {
11181107
}
11191108

11201109
AsciiString other = (AsciiString) obj;
1121-
return hashCode() == other.hashCode() &&
1122-
PlatformDependent.equals(array(), arrayOffset(), arrayOffset() + length(),
1123-
other.array(), other.arrayOffset(), other.arrayOffset() + other.length());
1110+
return length() == other.length() &&
1111+
hashCode() == other.hashCode() &&
1112+
PlatformDependent.equals(array(), arrayOffset(), other.array(), other.arrayOffset(), length());
11241113
}
11251114

11261115
/**
@@ -1336,14 +1325,15 @@ public double parseDouble(int start, int end) {
13361325
new HashingStrategy<CharSequence>() {
13371326
@Override
13381327
public int hashCode(CharSequence o) {
1339-
return AsciiString.caseInsensitiveHashCode(o);
1328+
return AsciiString.hashCode(o);
13401329
}
13411330

13421331
@Override
13431332
public boolean equals(CharSequence a, CharSequence b) {
13441333
return AsciiString.contentEqualsIgnoreCase(a, b);
13451334
}
13461335
};
1336+
13471337
public static final HashingStrategy<CharSequence> CASE_SENSITIVE_HASHER =
13481338
new HashingStrategy<CharSequence>() {
13491339
@Override
@@ -1368,27 +1358,7 @@ public static AsciiString of(CharSequence string) {
13681358
/**
13691359
* Returns the case-insensitive hash code of the specified string. Note that this method uses the same hashing
13701360
* algorithm with {@link #hashCode()} so that you can put both {@link AsciiString}s and arbitrary
1371-
* {@link CharSequence}s into the same {@link TextHeaders}.
1372-
*/
1373-
public static int caseInsensitiveHashCode(CharSequence value) {
1374-
if (value == null) {
1375-
return 0;
1376-
}
1377-
if (value.getClass() == AsciiString.class) {
1378-
return ((AsciiString) value).hashCodeCaseInsensitive();
1379-
}
1380-
1381-
int hash = 0;
1382-
for (int i = 0; i < value.length(); ++i) {
1383-
hash = hash * HASH_CODE_PRIME + (value.charAt(i) & 0x1F);
1384-
}
1385-
return hash;
1386-
}
1387-
1388-
/**
1389-
* A case-sensitive version of {@link caseInsensitiveHashCode(CharSequence)}.
1390-
* @param value
1391-
* @return
1361+
* {@link CharSequence}s into the same headers.
13921362
*/
13931363
public static int hashCode(CharSequence value) {
13941364
if (value == null) {
@@ -1398,11 +1368,7 @@ public static int hashCode(CharSequence value) {
13981368
return ((AsciiString) value).hashCode();
13991369
}
14001370

1401-
int hash = 0;
1402-
for (int i = 0; i < value.length(); ++i) {
1403-
hash = hash * HASH_CODE_PRIME + (value.charAt(i) & 0x1F);
1404-
}
1405-
return hash;
1371+
return PlatformDependent.hashCodeAscii(value);
14061372
}
14071373

14081374
/**

common/src/main/java/io/netty/util/internal/PlatformDependent.java

Lines changed: 200 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@
4949
import java.util.regex.Matcher;
5050
import java.util.regex.Pattern;
5151

52+
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_ASCII_SEED;
53+
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiCompute;
54+
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitize;
55+
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitizeAsByte;
56+
5257
/**
5358
* Utility that detects various properties specific to the current runtime
5459
* environment, such as Java version and the availability of the
@@ -361,6 +366,88 @@ public static long getLong(byte[] data, int index) {
361366
return PlatformDependent0.getLong(data, index);
362367
}
363368

369+
private static long getLongSafe(byte[] bytes, int offset) {
370+
if (BIG_ENDIAN_NATIVE_ORDER) {
371+
return (long) bytes[offset] << 56 |
372+
((long) bytes[offset + 1] & 0xff) << 48 |
373+
((long) bytes[offset + 2] & 0xff) << 40 |
374+
((long) bytes[offset + 3] & 0xff) << 32 |
375+
((long) bytes[offset + 4] & 0xff) << 24 |
376+
((long) bytes[offset + 5] & 0xff) << 16 |
377+
((long) bytes[offset + 6] & 0xff) << 8 |
378+
(long) bytes[offset + 7] & 0xff;
379+
}
380+
return (long) bytes[offset] & 0xff |
381+
((long) bytes[offset + 1] & 0xff) << 8 |
382+
((long) bytes[offset + 2] & 0xff) << 16 |
383+
((long) bytes[offset + 3] & 0xff) << 24 |
384+
((long) bytes[offset + 4] & 0xff) << 32 |
385+
((long) bytes[offset + 5] & 0xff) << 40 |
386+
((long) bytes[offset + 6] & 0xff) << 48 |
387+
((long) bytes[offset + 7] & 0xff) << 56;
388+
}
389+
390+
private static long getLongFromBytesSafe(CharSequence bytes, int offset) {
391+
if (BIG_ENDIAN_NATIVE_ORDER) {
392+
return (long) bytes.charAt(offset) << 56 |
393+
((long) bytes.charAt(offset + 1) & 0xff) << 48 |
394+
((long) bytes.charAt(offset + 2) & 0xff) << 40 |
395+
((long) bytes.charAt(offset + 3) & 0xff) << 32 |
396+
((long) bytes.charAt(offset + 4) & 0xff) << 24 |
397+
((long) bytes.charAt(offset + 5) & 0xff) << 16 |
398+
((long) bytes.charAt(offset + 6) & 0xff) << 8 |
399+
(long) bytes.charAt(offset + 7) & 0xff;
400+
}
401+
return (long) bytes.charAt(offset) & 0xff |
402+
((long) bytes.charAt(offset + 1) & 0xff) << 8 |
403+
((long) bytes.charAt(offset + 2) & 0xff) << 16 |
404+
((long) bytes.charAt(offset + 3) & 0xff) << 24 |
405+
((long) bytes.charAt(offset + 4) & 0xff) << 32 |
406+
((long) bytes.charAt(offset + 5) & 0xff) << 40 |
407+
((long) bytes.charAt(offset + 6) & 0xff) << 48 |
408+
((long) bytes.charAt(offset + 7) & 0xff) << 56;
409+
}
410+
411+
private static int getIntSafe(byte[] bytes, int offset) {
412+
if (BIG_ENDIAN_NATIVE_ORDER) {
413+
return bytes[offset] << 24 |
414+
(bytes[offset + 1] & 0xff) << 16 |
415+
(bytes[offset + 2] & 0xff) << 8 |
416+
bytes[offset + 3] & 0xff;
417+
}
418+
return bytes[offset] & 0xff |
419+
(bytes[offset + 1] & 0xff) << 8 |
420+
(bytes[offset + 2] & 0xff) << 16 |
421+
bytes[offset + 3] << 24;
422+
}
423+
424+
private static int getIntFromBytesSafe(CharSequence bytes, int offset) {
425+
if (BIG_ENDIAN_NATIVE_ORDER) {
426+
return bytes.charAt(offset) << 24 |
427+
(bytes.charAt(offset + 1) & 0xff) << 16 |
428+
(bytes.charAt(offset + 2) & 0xff) << 8 |
429+
bytes.charAt(offset + 3) & 0xff;
430+
}
431+
return bytes.charAt(offset) & 0xff |
432+
(bytes.charAt(offset + 1) & 0xff) << 8 |
433+
(bytes.charAt(offset + 2) & 0xff) << 16 |
434+
bytes.charAt(offset + 3) << 24;
435+
}
436+
437+
private static short getShortSafe(byte[] bytes, int offset) {
438+
if (BIG_ENDIAN_NATIVE_ORDER) {
439+
return (short) (bytes[offset] << 8 | (bytes[offset + 1] & 0xff));
440+
}
441+
return (short) (bytes[offset] & 0xff | (bytes[offset + 1] << 8));
442+
}
443+
444+
private static short getShortFromBytesSafe(CharSequence bytes, int offset) {
445+
if (BIG_ENDIAN_NATIVE_ORDER) {
446+
return (short) (bytes.charAt(offset) << 8 | (bytes.charAt(offset + 1) & 0xff));
447+
}
448+
return (short) (bytes.charAt(offset) & 0xff | (bytes.charAt(offset + 1) << 8));
449+
}
450+
364451
public static void putOrderedObject(Object object, long address, Object value) {
365452
PlatformDependent0.putOrderedObject(object, address, value);
366453
}
@@ -415,16 +502,51 @@ public static void copyMemory(long srcAddr, byte[] dst, int dstIndex, long lengt
415502
*
416503
* @param bytes1 the first byte array.
417504
* @param startPos1 the position (inclusive) to start comparing in {@code bytes1}.
418-
* @param endPos1 the position (exclusive) to stop comparing in {@code bytes1}.
419505
* @param bytes2 the second byte array.
420506
* @param startPos2 the position (inclusive) to start comparing in {@code bytes2}.
421-
* @param endPos2 the position (exclusive) to stop comparing in {@code bytes2}.
507+
* @param length the amount of bytes to compare. This is assumed to be validated as not going out of bounds
508+
* by the caller.
509+
*/
510+
public static boolean equals(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) {
511+
if (!hasUnsafe() || !PlatformDependent0.unalignedAccess()) {
512+
return equalsSafe(bytes1, startPos1, bytes2, startPos2, length);
513+
}
514+
return PlatformDependent0.equals(bytes1, startPos1, bytes2, startPos2, length);
515+
}
516+
517+
/**
518+
* Calculate a hash code of a byte array assuming ASCII character encoding.
519+
* The resulting hash code will be case insensitive.
520+
* @param bytes The array which contains the data to hash.
521+
* @param startPos What index to start generating a hash code in {@code bytes}
522+
* @param length The amount of bytes that should be accounted for in the computation.
523+
* @return The hash code of {@code bytes} assuming ASCII character encoding.
524+
* The resulting hash code will be case insensitive.
422525
*/
423-
public static boolean equals(byte[] bytes1, int startPos1, int endPos1, byte[] bytes2, int startPos2, int endPos2) {
526+
public static int hashCodeAscii(byte[] bytes, int startPos, int length) {
424527
if (!hasUnsafe() || !PlatformDependent0.unalignedAccess()) {
425-
return safeEquals(bytes1, startPos1, endPos1, bytes2, startPos2, endPos2);
528+
return hashCodeAsciiSafe(bytes, startPos, length);
426529
}
427-
return PlatformDependent0.equals(bytes1, startPos1, endPos1, bytes2, startPos2, endPos2);
530+
return PlatformDependent0.hashCodeAscii(bytes, startPos, length);
531+
}
532+
533+
/**
534+
* Calculate a hash code of a byte array assuming ASCII character encoding.
535+
* The resulting hash code will be case insensitive.
536+
* <p>
537+
* This method assumes that {@code bytes} is equivalent to a {@code byte[]} but just using {@link CharSequence}
538+
* for storage. The upper most byte of each {@code char} from {@code bytes} is ignored.
539+
* @param bytes The array which contains the data to hash (assumed to be equivalent to a {@code byte[]}).
540+
* @return The hash code of {@code bytes} assuming ASCII character encoding.
541+
* The resulting hash code will be case insensitive.
542+
*/
543+
public static int hashCodeAscii(CharSequence bytes) {
544+
char[] array;
545+
if (!hasUnsafe() || !PlatformDependent0.unalignedAccess() ||
546+
(array = PlatformDependent0.array(bytes)) == null) {
547+
return hashCodeAsciiSafe(bytes);
548+
}
549+
return PlatformDependent0.hashCodeAscii(array);
428550
}
429551

430552
/**
@@ -939,14 +1061,8 @@ private static int addressSize0() {
9391061
return PlatformDependent0.addressSize();
9401062
}
9411063

942-
private static boolean safeEquals(byte[] bytes1, int startPos1, int endPos1,
943-
byte[] bytes2, int startPos2, int endPos2) {
944-
final int len1 = endPos1 - startPos1;
945-
final int len2 = endPos2 - startPos2;
946-
if (len1 != len2) {
947-
return false;
948-
}
949-
final int end = startPos1 + len1;
1064+
private static boolean equalsSafe(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) {
1065+
final int end = startPos1 + length;
9501066
for (int i = startPos1, j = startPos2; i < end; ++i, ++j) {
9511067
if (bytes1[i] != bytes2[j]) {
9521068
return false;
@@ -955,7 +1071,78 @@ private static boolean safeEquals(byte[] bytes1, int startPos1, int endPos1,
9551071
return true;
9561072
}
9571073

1074+
/**
1075+
* Package private for testing purposes only!
1076+
*/
1077+
static int hashCodeAsciiSafe(byte[] bytes, int startPos, int length) {
1078+
int hash = HASH_CODE_ASCII_SEED;
1079+
final int remainingBytes = length & 7;
1080+
final int end = startPos + remainingBytes;
1081+
for (int i = startPos - 8 + length; i >= end; i -= 8) {
1082+
hash = hashCodeAsciiCompute(getLongSafe(bytes, i), hash);
1083+
}
1084+
switch(remainingBytes) {
1085+
case 7:
1086+
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3)), 13))
1087+
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
1088+
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
1089+
case 6:
1090+
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2)), 13))
1091+
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
1092+
case 5:
1093+
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1)), 13))
1094+
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
1095+
case 4:
1096+
return hash * 31 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos));
1097+
case 3:
1098+
return (hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
1099+
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
1100+
case 2:
1101+
return hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
1102+
case 1:
1103+
return hash * 31 + hashCodeAsciiSanitize(bytes[startPos]);
1104+
default:
1105+
return hash;
1106+
}
1107+
}
1108+
1109+
/**
1110+
* Package private for testing purposes only!
1111+
*/
1112+
static int hashCodeAsciiSafe(CharSequence bytes) {
1113+
int hash = HASH_CODE_ASCII_SEED;
1114+
final int remainingBytes = bytes.length() & 7;
1115+
for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) {
1116+
hash = hashCodeAsciiCompute(getLongFromBytesSafe(bytes, i), hash);
1117+
}
1118+
switch(remainingBytes) {
1119+
case 7:
1120+
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 3)), 13))
1121+
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
1122+
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
1123+
case 6:
1124+
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 2)), 13))
1125+
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
1126+
case 5:
1127+
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 1)), 13))
1128+
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
1129+
case 4:
1130+
return hash * 31 + hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 0));
1131+
case 3:
1132+
return (hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
1133+
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
1134+
case 2:
1135+
return hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
1136+
case 1:
1137+
return hash * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
1138+
default:
1139+
return hash;
1140+
}
1141+
}
1142+
9581143
private static final class AtomicLongCounter extends AtomicLong implements LongCounter {
1144+
private static final long serialVersionUID = 4074772784610639305L;
1145+
9591146
@Override
9601147
public void add(long delta) {
9611148
addAndGet(delta);

0 commit comments

Comments
 (0)