Skip to content

MurmurHash Onnx Export #5013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
May 5, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e9c522d
Add core hashing functions
KsenijaS Nov 15, 2019
e909636
Don't convert to string in HashText
KsenijaS Nov 18, 2019
c14b085
add versioning for vectro types
KsenijaS Nov 18, 2019
f9dd77c
Use span instead of pointers
KsenijaS Nov 20, 2019
f0d513c
Add perf test
KsenijaS Nov 21, 2019
d1a4737
add perf for num types
KsenijaS Nov 22, 2019
edf6d2b
update hashing algorithm
KsenijaS Dec 3, 2019
c0167ec
initial cleanup
KsenijaS Dec 4, 2019
257ed6d
modify hashing algo for strings
KsenijaS Dec 5, 2019
d7e6c19
remove MurmurRoundV2
KsenijaS Dec 6, 2019
aff61f5
Add hashing benchark
KsenijaS Dec 11, 2019
566dd50
remove comments
KsenijaS Dec 18, 2019
9f91326
modify murmur hash for strings
KsenijaS Dec 18, 2019
be1f77e
Onnx conversion for key types
KsenijaS Dec 19, 2019
25fe50b
Remove tests
KsenijaS Dec 19, 2019
c1aa8d1
hashing estimator export
Lynx1820 Apr 10, 2020
e04e4f8
renaming var
Lynx1820 Apr 10, 2020
46036be
baseline updates
Lynx1820 Apr 16, 2020
17c7893
baseline changes
Lynx1820 Apr 17, 2020
501f49e
file changes
Lynx1820 Apr 17, 2020
dbc3e33
versioning added
Lynx1820 Apr 20, 2020
46e7ead
benchmark testing added
Lynx1820 Apr 22, 2020
63d31dd
resolving comments and adding onnx key test
Lynx1820 Apr 22, 2020
6e5dac7
versioning fix
Lynx1820 Apr 22, 2020
ae4ec77
addressing comments
Lynx1820 Apr 23, 2020
c649393
renamed file and added boolean test
Lynx1820 Apr 24, 2020
306d0ca
updated documentation
Lynx1820 Apr 24, 2020
57d2f35
addressing comments
Lynx1820 Apr 27, 2020
bc3b068
baseline changes
Lynx1820 Apr 27, 2020
ac31058
addressing comments
Lynx1820 Apr 27, 2020
a0712db
small comment fix
Lynx1820 Apr 27, 2020
072a9db
addressing comments
Lynx1820 Apr 28, 2020
0fe7de7
addressing comments
Lynx1820 May 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add core hashing functions
  • Loading branch information
KsenijaS authored and Lynx1820 committed Apr 22, 2020
commit e9c522d2a6432750ed0f4da9469d5af8d7cc6a31
173 changes: 173 additions & 0 deletions src/Microsoft.ML.Core/Utilities/Hashing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,167 @@ public static uint MurmurRound(uint hash, uint chunk)
return hash;
}

private static unsafe uint MurmurRoundV2(uint hash, byte* key, int len)
{
int nblocks = len / 4;
byte* data = key;
uint* blocks = (uint*)(data + nblocks * 4);

for (int i = -nblocks; i!=0; i++)
{
uint chunk = blocks[i];
chunk *= 0xCC9E2D51;
chunk = Rotate(chunk, 15);
chunk *= 0x1B873593;

hash ^= chunk;
hash = Rotate(hash, 13);
hash *= 5;
hash += 0xE6546B64;
}

byte* tail = (byte*)(data + nblocks * 4);

uint k1 = 0;

switch (len & 3)
{
case 3:
k1 ^= (uint)tail[2] << 16;
goto case 2;
case 2:
k1 ^= (uint)tail[1] << 8;
goto case 1;
case 1:
k1 ^= tail[0];
k1 *= 0xCC9E2D51; k1 = Rotate(k1, 15);
k1 *= 0x1B873593;
hash ^= k1;
break;
}

return hash;
}

public static uint MurmurRoundFloat(uint hash, float chunk)
{
unsafe
{
float* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 4);
}
}

public static uint MurmurRoundDouble(uint hash, double chunk)
{
unsafe
{
double* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 4);
}
}

public static uint MurmurRoundText(uint hash, string chunk)
{
unsafe
{
byte[] utf16Bytes = Encoding.Unicode.GetBytes(chunk);
byte[] utf8Bytes = Encoding.Convert(Encoding.Unicode, Encoding.UTF8, utf16Bytes);
fixed (byte* key = utf8Bytes)
return MurmurRoundV2(hash, key, chunk.Length);
}
}

public static uint MurmurRoundU1(uint hash, byte chunk)
{
unsafe
{
byte* key = &chunk;
return MurmurRoundV2(hash, key, 1);
}
}

public static uint MurmurRoundU2(uint hash, ushort chunk)
{
unsafe
{
ushort* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 2);
}
}

public static uint MurmurRoundU4(uint hash, uint chunk)
{
unsafe
{
uint* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 4);
}
}

public static uint MurmurRoundU8(uint hash, ulong chunk)
{
unsafe
{
ulong* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 8);
}
}

public static uint MurmurRoundI1(uint hash, sbyte chunk)
{
unsafe
{
sbyte* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 1);
}
}

public static uint MurmurRoundI2(uint hash, short chunk)
{
unsafe
{
short* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 1);
}
}

public static uint MurmurRoundI4(uint hash, int chunk)
{
unsafe
{
int* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 1);
}
}

public static uint MurmurRoundI8(uint hash, long chunk)
{
unsafe
{
long* keys = &chunk;
byte* key;
key = (byte*)keys;
return MurmurRoundV2(hash, key, 1);
}
}

/// <summary>
/// Implements the murmur hash 3 algorithm, using a mock UTF-8 encoding.
/// The UTF-8 conversion ignores the possibilities of unicode planes other than the 0th.
Expand Down Expand Up @@ -284,6 +445,18 @@ public static uint MixHash(uint hash)
return hash;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint MixHashV2(uint hash, int len)
{
hash ^= (uint)len;
hash ^= hash >> 16;
hash *= 0x85ebca6b;
hash ^= hash >> 13;
hash *= 0xc2b2ae35;
hash ^= hash >> 16;
return hash;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint Rotate(uint x, int r)
{
Expand Down
Loading