BPHash
General object hashing library for C++
test_reference.cpp
Go to the documentation of this file.
1 /*! \file
2  * \brief Testing of core hashing algorithms
3  */
4 
5 /* Copyright (c) 2016 Benjamin Pritchard <ben@bennyp.org>
6  * This file is part of the BPHash project, which is released
7  * under the BSD 3-clause license. See the LICENSE file for details
8  */
9 
10 /* This file tests the core MurmurHash algorithms against
11  * the reference implementations in smhasher */
12 
13 #include <iostream>
14 #include <random>
15 #include <chrono>
16 #include <sstream>
17 
18 #include "bphash/Hasher.hpp"
23 
24 #include "MurmurHash3_reference.h" // in this directory
25 
26 // Size of the test set. Make it an odd number
27 #define TEST_SIZE 1024*1024 + 7
28 
29 using namespace bphash;
30 
31 
32 static void random_fill(std::vector<uint8_t> & buffer)
33 {
34  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
35  std::default_random_engine generator(seed);
36  std::uniform_int_distribution<uint8_t> dist;
37 
38  for(auto & it : buffer)
39  it = dist(generator);
40 }
41 
42 
43 static void test_offset(detail::HashImpl & hasher,
44  const std::vector<uint8_t> & testdata,
45  size_t offset, size_t blocksize,
46  const HashValue & reference,
47  int hashsize, int bitness)
48 {
49  std::cout << "Testing " << hashsize << "-bit x" << bitness << " hash,"
50  << " offset " << offset
51  << " blocksize " << blocksize << " ... ";
52 
53  hasher.reset();
54 
55  // do the first part
56  hasher.update(testdata.data(), offset);
57 
58  // now do the rest by blocks
59  if(blocksize == 0)
60  blocksize = testdata.size() - offset;
61 
62  size_t done = offset;
63  size_t todo = 0;
64 
65  do {
66  todo = blocksize;
67  if( (done + todo) > testdata.size() )
68  todo = testdata.size() - done;
69 
70  hasher.update(testdata.data() + done, todo);
71  done += todo;
72 
73  } while(todo);
74 
75  HashValue calc = hasher.finalize();
76 
77  if(calc != reference)
78  {
79  std::cout << "FAILED\n";
80 
81  std::stringstream ss;
82  ss << "Mismatch: " << hashsize << "-bit hash,"
83  << " offset " << offset
84  << " blocksize " << blocksize;
85 
86  throw std::runtime_error(ss.str());
87  }
88  else
89  std::cout << "OK\n";
90 }
91 
92 
93 
94 int main(void)
95 {
96  std::vector<uint8_t> testdata(TEST_SIZE);
97  random_fill(testdata);
98 
99  HashValue ref_32_x32(4);
100  HashValue ref_32_x64(4);
101  HashValue ref_64_x64(8);
102  HashValue ref_128_x64(16);
103 
104  const void * testdata_ptr = testdata.data();
105  const size_t testdata_size = testdata.size();
106  const int testdata_size_int = static_cast<int>(testdata_size);
107 
108  // calculate the reference values
109  MurmurHash3_x86_32(testdata_ptr, testdata_size_int, 0, ref_32_x32.data());
110  MurmurHash3_x64_128(testdata_ptr, testdata_size_int, 0, ref_128_x64.data());
111  ref_32_x64 = truncate_hash(ref_128_x64, 4);
112  ref_64_x64 = truncate_hash(ref_128_x64, 8);
113 
117  detail::MurmurHash3_128_x64 mh128_x64;
118 
119  mh32_x32.update(testdata_ptr, testdata_size);
120  mh32_x64.update(testdata_ptr, testdata_size);
121  mh64_x64.update(testdata_ptr, testdata_size);
122  mh128_x64.update(testdata_ptr, testdata_size);
123 
124  auto bph_32_x32 = mh32_x32.finalize();
125  auto bph_32_x64 = mh32_x64.finalize();
126  auto bph_64_x64 = mh64_x64.finalize();
127  auto bph_128_x64 = mh128_x64.finalize();
128 
129 
130  std::string ref_32_x32_str = hash_to_string(ref_32_x32);
131  std::string ref_32_x64_str = hash_to_string(ref_32_x64);
132  std::string ref_64_x64_str = hash_to_string(ref_64_x64);
133  std::string ref_128_x64_str = hash_to_string(ref_128_x64);
134 
135  std::string bph_32_x32_str = hash_to_string(bph_32_x32);
136  std::string bph_32_x64_str = hash_to_string(bph_32_x64);
137  std::string bph_64_x64_str = hash_to_string(bph_64_x64);
138  std::string bph_128_x64_str = hash_to_string(bph_128_x64);
139 
140  try {
141 
142  std::cout << "\n";
143  std::cout << "32-bit hash, x32\n";
144  std::cout << "Reference: " << ref_32_x32_str << "\n";
145  std::cout << " BPHash: " << bph_32_x32_str << "\n";
146  std::cout << "\n";
147  if(ref_32_x32 != bph_32_x32)
148  throw std::runtime_error("Mismatch on 32-bit x64 hash");
149 
150  std::cout << "32-bit hash, x64\n";
151  std::cout << "Reference: " << ref_32_x64_str << "\n";
152  std::cout << " BPHash: " << bph_32_x64_str << "\n";
153  std::cout << "\n";
154  if(ref_32_x64 != bph_32_x64)
155  throw std::runtime_error("Mismatch on 32-bit x64 hash");
156 
157  std::cout << "64-bit hash, x64\n";
158  std::cout << "Reference: " << ref_64_x64_str << "\n";
159  std::cout << " BPHash: " << bph_64_x64_str << "\n";
160  std::cout << "\n";
161  if(ref_64_x64 != bph_64_x64)
162  throw std::runtime_error("Mismatch on 64-bit x64 hash");
163 
164  std::cout << "128-bit hash, x64\n";
165  std::cout << "Reference: " << ref_128_x64_str << "\n";
166  std::cout << " BPHash: " << bph_128_x64_str << "\n";
167  std::cout << "\n";
168  if(ref_128_x64 != bph_128_x64)
169  throw std::runtime_error("Mismatch on 128-bit x64 hash");
170 
171 
172  // try bphash with different offsets (to test progressive hashing
173  for(size_t i = 0; i <= 18; i++)
174  for(size_t j = 0; j <= 1150; j += 23) // purposely odd numbers
175  {
176  test_offset(mh32_x32, testdata, i, j, ref_32_x32, 32, 32);
177  test_offset(mh32_x64, testdata, i, j, ref_32_x64, 32, 64);
178  test_offset(mh64_x64, testdata, i, j, ref_64_x64, 64, 64);
179  test_offset(mh128_x64, testdata, i, j, ref_128_x64, 128, 64);
180  }
181 
182  std::cout << "\n";
183 
184  }
185  catch(const std::exception & ex)
186  {
187  std::cout << "!!! Failed test: " << ex.what() << "\n\n";
188  return 1;
189  }
190 
191  return 0;
192 }
193 
MurmurHash reference implementation (header)
static void test_offset(detail::HashImpl &hasher, const std::vector< uint8_t > &testdata, size_t offset, size_t blocksize, const HashValue &reference, int hashsize, int bitness)
Implementation of MurmurHash3 32-bit x64 hash.
virtual void update(void const *data, size_t nbytes)
Add some data to the hash.
Implementation of MurmurHash3 64-bit x64 hash.
HashValue truncate_hash(const HashValue &hash, size_t nbytes)
Truncate the hash to a given number of bytes.
Definition: Hash.cpp:32
std::string hash_to_string(const HashValue &hash)
Return a string representation of a hash.
Definition: Hash.cpp:17
A class that hashes objects (header)
MurmurHash3 32-bit x32 hash (header)
virtual HashValue finalize(void)
Finish hashing and report the hash.
virtual HashValue finalize(void)=0
Finish hashing and report the hash.
void MurmurHash3_x86_32(const void *key, int len, uint32_t seed, void *out)
std::vector< uint8_t > HashValue
Stores the value of a hash.
Definition: Hash.hpp:24
MurmurHash3 128-bit x64 hash (header)
virtual HashValue finalize(void)
Finish hashing and report the hash.
MurmurHash3 32-bit x64 hash (header)
virtual void update(void const *data, size_t nbytes)=0
Add some data to the hash.
#define TEST_SIZE
virtual void reset(void)=0
Zero out the hash.
virtual HashValue finalize(void)
Finish hashing and report the hash.
Implementation of MurmurHash3 32-bit x32 hash.
Implementation of MurmurHash3 128-bit x64 hash.
Base class for a hash implementation.
Definition: HashImpl.hpp:22
void MurmurHash3_x64_128(const void *key, const int len, const uint32_t seed, void *out)
virtual void update(void const *data, size_t nbytes)
Add some data to the hash.
MurmurHash3 64-bit x64 hash (header)
virtual HashValue finalize(void)
Finish hashing and report the hash.
static void random_fill(std::vector< uint8_t > &buffer)
int main(void)