Skip to content

Commit cdd95de

Browse files
markbenvenutohanumantmk
authored andcommitted
SERVER-21828 Murmurhash for bigendian
Make murmurhash output the same on big endian architectures as on little endian.
1 parent 9405fa9 commit cdd95de

File tree

4 files changed

+37
-14
lines changed

4 files changed

+37
-14
lines changed

src/mongo/base/string_data.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
#include <ostream>
3131
#include <third_party/murmurhash3/MurmurHash3.h>
3232

33+
#include "mongo/base/data_type_endian.h"
34+
#include "mongo/base/data_view.h"
35+
3336
namespace mongo {
3437

3538
namespace {
@@ -39,16 +42,16 @@ size_t murmur3(StringData str);
3942

4043
template <>
4144
size_t murmur3<4>(StringData str) {
42-
uint32_t hash;
45+
char hash[4];
4346
MurmurHash3_x86_32(str.rawData(), str.size(), 0, &hash);
44-
return hash;
47+
return ConstDataView(hash).read<LittleEndian<std::uint32_t>>();
4548
}
4649

4750
template <>
4851
size_t murmur3<8>(StringData str) {
49-
uint64_t hash[2];
52+
char hash[16];
5053
MurmurHash3_x64_128(str.rawData(), str.size(), 0, hash);
51-
return static_cast<size_t>(hash[0]);
54+
return static_cast<size_t>(ConstDataView(hash).read<LittleEndian<std::uint64_t>>());
5255
}
5356

5457
} // namespace

src/mongo/db/fts/fts_index_format_test.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,10 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
209209
string longWordCat = longPrefix + "cat";
210210
// "aaa...aaasat"
211211
string longWordSat = longPrefix + "sat";
212-
string text = mongoutils::str::stream() << longWordCat << " " << longWordSat;
212+
// "aaa...aaamongodbfts"
213+
string longWordMongoDBFts = longPrefix + "mongodbfts";
214+
string text = mongoutils::str::stream() << longWordCat << " " << longWordSat << " "
215+
<< longWordMongoDBFts;
213216
FTSIndexFormat::getKeys(spec, BSON("data" << text), &keys);
214217

215218
// Hard-coded expected computed keys for future-proofing.
@@ -218,6 +221,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
218221
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab8e78455d827ebb87cbe87f392bf45f6");
219222
// sat
220223
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaf2d6f58bb3b81b97e611ae7ccac6dea7");
224+
// mongodbfts
225+
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaae1d6b34f5d9c92acecd8cce32f747b27");
221226

222227
assertEqualsIndexKeys(expectedKeys, keys);
223228
}

src/third_party/murmurhash3/MurmurHash3.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99

1010
#include "MurmurHash3.h"
1111

12+
#include "mongo/base/data_type_endian.h"
13+
#include "mongo/base/data_view.h"
14+
#include "mongo/platform/endian.h"
15+
1216
//-----------------------------------------------------------------------------
1317
// Platform-specific functions and macros
1418

@@ -51,15 +55,22 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r )
5155
//-----------------------------------------------------------------------------
5256
// Block read - if your platform needs to do endian-swapping or can only
5357
// handle aligned reads, do the conversion here
58+
//
59+
// NOTE, MongoDB code: JC -
60+
// ConstDataView handles the byte swapping and avoids unaligned reads. Note
61+
// that we need reversed versions because we actually want little endian
62+
// encoded blocks out of getblock, and our input data is in the native format.
5463

5564
FORCE_INLINE inline uint32_t getblock ( const uint32_t * p, int i )
5665
{
57-
return p[i];
66+
return mongo::ConstDataView(reinterpret_cast<const char*>(p))
67+
.read<mongo::ReverseLittleEndian<uint32_t>>(i * sizeof(uint32_t));
5868
}
5969

6070
FORCE_INLINE inline uint64_t getblock ( const uint64_t * p, int i )
6171
{
62-
return p[i];
72+
return mongo::ConstDataView(reinterpret_cast<const char*>(p))
73+
.read<mongo::ReverseLittleEndian<uint64_t>>(i * sizeof(uint64_t));
6374
}
6475

6576
//-----------------------------------------------------------------------------
@@ -142,7 +153,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
142153

143154
h1 = fmix(h1);
144155

145-
*(uint32_t*)out = h1;
156+
*(uint32_t*)out = mongo::endian::nativeToLittle(h1);
146157
}
147158

148159
//-----------------------------------------------------------------------------
@@ -244,10 +255,10 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
244255
h1 += h2; h1 += h3; h1 += h4;
245256
h2 += h1; h3 += h1; h4 += h1;
246257

247-
((uint32_t*)out)[0] = h1;
248-
((uint32_t*)out)[1] = h2;
249-
((uint32_t*)out)[2] = h3;
250-
((uint32_t*)out)[3] = h4;
258+
((uint32_t*)out)[0] = mongo::endian::nativeToLittle(h1);
259+
((uint32_t*)out)[1] = mongo::endian::nativeToLittle(h2);
260+
((uint32_t*)out)[2] = mongo::endian::nativeToLittle(h3);
261+
((uint32_t*)out)[3] = mongo::endian::nativeToLittle(h4);
251262
}
252263

253264
//-----------------------------------------------------------------------------
@@ -327,8 +338,8 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
327338
h1 += h2;
328339
h2 += h1;
329340

330-
((uint64_t*)out)[0] = h1;
331-
((uint64_t*)out)[1] = h2;
341+
((uint64_t*)out)[0] = mongo::endian::nativeToLittle(h1);
342+
((uint64_t*)out)[1] = mongo::endian::nativeToLittle(h2);
332343
}
333344

334345
//-----------------------------------------------------------------------------
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
Import("env")
2+
3+
env.InjectThirdPartyIncludePaths(libraries=['boost'])
4+
5+
env.Append(CPPPATH=['#src', '$BUILD_DIR'])
26
env.Library("murmurhash3", ["MurmurHash3.cpp"])

0 commit comments

Comments
 (0)