1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.digest;
19
20 import org.apache.commons.codec.binary.StringUtils;
21
22 /**
23 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24 *
25 * <p>
26 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29 * </p>
30 *
31 * <p>
32 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34 * </p>
35 *
36 * <p>
37 * This is public domain code with no copyrights. From home page of
38 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39 * </p>
40 *
41 * <blockquote>
42 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
43 * code."
44 * </blockquote>
45 *
46 * <p>
47 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
48 * That adaption contains a {@code hash64} method that is not part of the original
49 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
50 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
51 * </p>
52 *
53 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
54 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
55 * code</a>
56 * @see <a href=
57 * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
58 * Apache Hive Murmer3</a>
59 * @since 1.13
60 */
61 public final class MurmurHash3 {
62
63 /**
64 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
65 * hash computed.
66 *
67 * <p>
68 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
69 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
70 * </p>
71 *
72 * <p>
73 * This implementation contains a sign-extension bug in the finalization step of
74 * any bytes left over from dividing the length by 4. This manifests if any of these
75 * bytes are negative.
76 * </p>
77 *
78 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
79 */
80 @Deprecated
81 public static class IncrementalHash32 extends IncrementalHash32x86 {
82
83 /**
84 * Constructs a new instance.
85 */
86 public IncrementalHash32() {
87 // empty
88 }
89
90 /**
91 * {@inheritDoc}
92 *
93 * <p>
94 * This implementation contains a sign-extension bug in the finalization step of
95 * any bytes left over from dividing the length by 4. This manifests if any of these
96 * bytes are negative.
97 * <p>
98 *
99 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100 */
101 @Override
102 @Deprecated
103 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104 int result = hash;
105 // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106 int k1 = 0;
107 switch (unprocessedLength) {
108 case 3:
109 k1 ^= unprocessed[2] << 16;
110 // falls-through
111 case 2:
112 k1 ^= unprocessed[1] << 8;
113 // falls-through
114 case 1:
115 k1 ^= unprocessed[0];
116 // mix functions
117 k1 *= C1_32;
118 k1 = Integer.rotateLeft(k1, R1_32);
119 k1 *= C2_32;
120 result ^= k1;
121 }
122 // finalization
123 result ^= totalLen;
124 return fmix32(result);
125 }
126 }
127
128 /**
129 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130 * hash computed.
131 *
132 * <p>
133 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135 * </p>
136 *
137 * @since 1.14
138 */
139 public static class IncrementalHash32x86 {
140
141 /** The size of byte blocks that are processed together. */
142 private static final int BLOCK_SIZE = 4;
143
144 /**
145 * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146 * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147 * significant.
148 *
149 * @param b1 The first byte.
150 * @param b2 The second byte.
151 * @param b3 The third byte.
152 * @param b4 The fourth byte.
153 * @return The 32-bit integer.
154 */
155 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156 return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157 }
158
159 /** Up to 3 unprocessed bytes from input data. */
160 private final byte[] unprocessed = new byte[3];
161
162 /** The number of unprocessed bytes in the tail data. */
163 private int unprocessedLength;
164
165 /** The total number of input bytes added since the start. */
166 private int totalLen;
167
168 /**
169 * The current running hash.
170 * This must be finalized to generate the 32-bit hash value.
171 */
172 private int hash;
173
174 /**
175 * Constructs a new instance.
176 */
177 public IncrementalHash32x86() {
178 // empty
179 }
180
181 /**
182 * Adds the byte array to the current incremental hash.
183 *
184 * @param data The input byte array.
185 * @param offset The offset of data.
186 * @param length The length of array.
187 */
188 public final void add(final byte[] data, final int offset, final int length) {
189 if (length <= 0) {
190 // Nothing to add
191 return;
192 }
193 totalLen += length;
194 // Process the bytes in blocks of 4.
195 // New bytes must be added to any current unprocessed bytes,
196 // then processed in blocks of 4 and the remaining bytes saved:
197 //
198 // |--|---------------------------|--|
199 // unprocessed
200 // main block
201 // remaining
202
203 // Check if the unprocessed bytes and new bytes can fill a block of 4.
204 // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205 // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206 if (unprocessedLength + length - BLOCK_SIZE < 0) {
207 // Not enough so add to the unprocessed bytes
208 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209 unprocessedLength += length;
210 return;
211 }
212 // Combine unprocessed bytes with new bytes.
213 final int newOffset;
214 final int newLength;
215 if (unprocessedLength > 0) {
216 int k = -1;
217 switch (unprocessedLength) {
218 case 1:
219 k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220 break;
221 case 2:
222 k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223 break;
224 case 3:
225 k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226 break;
227 default:
228 throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229 }
230 hash = mix32(k, hash);
231 // Update the offset and length
232 final int consumed = BLOCK_SIZE - unprocessedLength;
233 newOffset = offset + consumed;
234 newLength = length - consumed;
235 } else {
236 newOffset = offset;
237 newLength = length;
238 }
239 // Main processing of blocks of 4 bytes
240 final int nblocks = newLength >> 2;
241
242 for (int i = 0; i < nblocks; i++) {
243 final int index = newOffset + (i << 2);
244 final int k = MurmurHash.getLittleEndianInt(data, index);
245 hash = mix32(k, hash);
246 }
247 // Save left-over unprocessed bytes
248 final int consumed = nblocks << 2;
249 unprocessedLength = newLength - consumed;
250 if (unprocessedLength != 0) {
251 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252 }
253 }
254
255 /**
256 * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257 * will generate the same hash value.
258 *
259 * @return The 32-bit hash.
260 */
261 public final int end() {
262 // Allow calling end() again after adding no data to return the same result.
263 return finalise(hash, unprocessedLength, unprocessed, totalLen);
264 }
265
266 /**
267 * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268 * and performing final mixing.
269 *
270 * @param hash The running hash.
271 * @param unprocessedLength The number of unprocessed bytes in the tail data.
272 * @param unprocessed Up to 3 unprocessed bytes from input data.
273 * @param totalLen The total number of input bytes added since the start.
274 * @return The 32-bit hash.
275 */
276 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277 int result = hash;
278 int k1 = 0;
279 switch (unprocessedLength) {
280 case 3:
281 k1 ^= (unprocessed[2] & 0xff) << 16;
282 // falls-through
283 case 2:
284 k1 ^= (unprocessed[1] & 0xff) << 8;
285 // falls-through
286 case 1:
287 k1 ^= unprocessed[0] & 0xff;
288 // mix functions
289 k1 *= C1_32;
290 k1 = Integer.rotateLeft(k1, R1_32);
291 k1 *= C2_32;
292 result ^= k1;
293 }
294 // finalization
295 result ^= totalLen;
296 return fmix32(result);
297 }
298
299 /**
300 * Starts a new incremental hash.
301 *
302 * @param seed The initial seed value.
303 */
304 public final void start(final int seed) {
305 // Reset
306 unprocessedLength = totalLen = 0;
307 hash = seed;
308 }
309 }
310
311 /**
312 * A random number to use for a hash code.
313 *
314 * @deprecated This is not used internally and will be removed in a future release.
315 */
316 @Deprecated
317 public static final long NULL_HASHCODE = 2862933555777941757L;
318
319 /**
320 * A default seed to use for the murmur hash algorithm.
321 * Has the value {@code 104729}.
322 */
323 public static final int DEFAULT_SEED = 104729;
324 // Constants for 32-bit variant
325 private static final int C1_32 = 0xcc9e2d51;
326 private static final int C2_32 = 0x1b873593;
327 private static final int R1_32 = 15;
328 private static final int R2_32 = 13;
329
330 private static final int M_32 = 5;
331 private static final int N_32 = 0xe6546b64;
332 // Constants for 128-bit variant
333 private static final long C1 = 0x87c37b91114253d5L;
334 private static final long C2 = 0x4cf5ad432745937fL;
335 private static final int R1 = 31;
336 private static final int R2 = 27;
337 private static final int R3 = 33;
338 private static final int M = 5;
339
340 private static final int N1 = 0x52dce729;
341
342 private static final int N2 = 0x38495ab5;
343
344 /**
345 * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
346 *
347 * @param hash The current hash.
348 * @return The final hash.
349 */
350 private static int fmix32(int hash) {
351 hash ^= hash >>> 16;
352 hash *= 0x85ebca6b;
353 hash ^= hash >>> 13;
354 hash *= 0xc2b2ae35;
355 hash ^= hash >>> 16;
356 return hash;
357 }
358
359 /**
360 * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
361 *
362 * @param hash The current hash.
363 * @return The final hash.
364 */
365 private static long fmix64(long hash) {
366 hash ^= hash >>> 33;
367 hash *= 0xff51afd7ed558ccdL;
368 hash ^= hash >>> 33;
369 hash *= 0xc4ceb9fe1a85ec53L;
370 hash ^= hash >>> 33;
371 return hash;
372 }
373
374 /**
375 * Generates 128-bit hash from the byte array with a default seed.
376 * This is a helper method that will produce the same result as:
377 *
378 * <pre>
379 * int offset = 0;
380 * int seed = 104729;
381 * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
382 * </pre>
383 *
384 * <p>
385 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
386 * this result as the default seed is positive.
387 * </p>
388 *
389 * @param data The input byte array.
390 * @return The 128-bit hash (2 longs).
391 * @see #hash128(byte[], int, int, int)
392 */
393 public static long[] hash128(final byte[] data) {
394 return hash128(data, 0, data.length, DEFAULT_SEED);
395 }
396
397 /**
398 * Generates 128-bit hash from the byte array with the given offset, length and seed.
399 *
400 * <p>
401 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
402 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
403 * </p>
404 *
405 * <p>
406 * This implementation contains a sign-extension bug in the seed initialization.
407 * This manifests if the seed is negative.
408 * </p>
409 *
410 * @param data The input byte array.
411 * @param offset The first element of array.
412 * @param length The length of array.
413 * @param seed The initial seed value.
414 * @return The 128-bit hash (2 longs).
415 * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
416 */
417 @Deprecated
418 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
419 // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
420 // to maintain behavioral compatibility with the original version.
421 // The implicit conversion to a long will extend a negative sign
422 // bit through the upper 32-bits of the long seed. These should be zero.
423 return hash128x64Internal(data, offset, length, seed);
424 }
425
426 /**
427 * Generates 128-bit hash from a string with a default seed.
428 *
429 * <p>
430 * Before 1.14 the string was converted using default encoding. Since 1.14 the string is converted to bytes using UTF-8 encoding.
431 * </p>
432 * <p>
433 * This is a helper method that will produce the same result as:
434 * </p>
435 *
436 * <pre>
437 * int offset = 0;
438 * int seed = 104729;
439 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
440 * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
441 * </pre>
442 *
443 * <p>
444 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect this result as the default seed is positive.
445 * </p>
446 *
447 * @param data The input String.
448 * @return The 128-bit hash (2 longs).
449 * @see #hash128(byte[], int, int, int)
450 * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from {@link String#getBytes(java.nio.charset.Charset)}.
451 */
452 @Deprecated
453 public static long[] hash128(final String data) {
454 final byte[] bytes = StringUtils.getBytesUtf8(data);
455 return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
456 }
457
458 /**
459 * Generates 128-bit hash from the byte array with a seed of zero.
460 * This is a helper method that will produce the same result as:
461 *
462 * <pre>
463 * int offset = 0;
464 * int seed = 0;
465 * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
466 * </pre>
467 *
468 * @param data The input byte array.
469 * @return The 128-bit hash (2 longs).
470 * @see #hash128x64(byte[], int, int, int)
471 * @since 1.14
472 */
473 public static long[] hash128x64(final byte[] data) {
474 return hash128x64(data, 0, data.length, 0);
475 }
476
477 /**
478 * Generates 128-bit hash from the byte array with the given offset, length and seed.
479 *
480 * <p>
481 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
482 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
483 * </p>
484 *
485 * @param data The input byte array.
486 * @param offset The first element of array.
487 * @param length The length of array.
488 * @param seed The initial seed value.
489 * @return The 128-bit hash (2 longs).
490 * @since 1.14
491 */
492 public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
493 // Use an unsigned 32-bit integer as the seed
494 return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
495 }
496
497 /**
498 * Generates 128-bit hash from the byte array with the given offset, length and seed.
499 *
500 * <p>
501 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
502 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
503 * </p>
504 *
505 * @param data The input byte array.
506 * @param offset The first element of array.
507 * @param length The length of array.
508 * @param seed The initial seed value.
509 * @return The 128-bit hash (2 longs).
510 */
511 private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
512 long h1 = seed;
513 long h2 = seed;
514 final int nblocks = length >> 4;
515 // body
516 for (int i = 0; i < nblocks; i++) {
517 final int index = offset + (i << 4);
518 long k1 = MurmurHash.getLittleEndianLong(data, index);
519 long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
520 // mix functions for k1
521 k1 *= C1;
522 k1 = Long.rotateLeft(k1, R1);
523 k1 *= C2;
524 h1 ^= k1;
525 h1 = Long.rotateLeft(h1, R2);
526 h1 += h2;
527 h1 = h1 * M + N1;
528 // mix functions for k2
529 k2 *= C2;
530 k2 = Long.rotateLeft(k2, R3);
531 k2 *= C1;
532 h2 ^= k2;
533 h2 = Long.rotateLeft(h2, R1);
534 h2 += h1;
535 h2 = h2 * M + N2;
536 }
537 // tail
538 long k1 = 0;
539 long k2 = 0;
540 final int index = offset + (nblocks << 4);
541 switch (offset + length - index) {
542 case 15:
543 k2 ^= ((long) data[index + 14] & 0xff) << 48;
544 // falls-through
545 case 14:
546 k2 ^= ((long) data[index + 13] & 0xff) << 40;
547 // falls-through
548 case 13:
549 k2 ^= ((long) data[index + 12] & 0xff) << 32;
550 // falls-through
551 case 12:
552 k2 ^= ((long) data[index + 11] & 0xff) << 24;
553 // falls-through
554 case 11:
555 k2 ^= ((long) data[index + 10] & 0xff) << 16;
556 // falls-through
557 case 10:
558 k2 ^= ((long) data[index + 9] & 0xff) << 8;
559 // falls-through
560 case 9:
561 k2 ^= data[index + 8] & 0xff;
562 k2 *= C2;
563 k2 = Long.rotateLeft(k2, R3);
564 k2 *= C1;
565 h2 ^= k2;
566 // falls-through
567 case 8:
568 k1 ^= ((long) data[index + 7] & 0xff) << 56;
569 // falls-through
570 case 7:
571 k1 ^= ((long) data[index + 6] & 0xff) << 48;
572 // falls-through
573 case 6:
574 k1 ^= ((long) data[index + 5] & 0xff) << 40;
575 // falls-through
576 case 5:
577 k1 ^= ((long) data[index + 4] & 0xff) << 32;
578 // falls-through
579 case 4:
580 k1 ^= ((long) data[index + 3] & 0xff) << 24;
581 // falls-through
582 case 3:
583 k1 ^= ((long) data[index + 2] & 0xff) << 16;
584 // falls-through
585 case 2:
586 k1 ^= ((long) data[index + 1] & 0xff) << 8;
587 // falls-through
588 case 1:
589 k1 ^= data[index] & 0xff;
590 k1 *= C1;
591 k1 = Long.rotateLeft(k1, R1);
592 k1 *= C2;
593 h1 ^= k1;
594 }
595 // finalization
596 h1 ^= length;
597 h2 ^= length;
598
599 h1 += h2;
600 h2 += h1;
601
602 h1 = fmix64(h1);
603 h2 = fmix64(h2);
604
605 h1 += h2;
606 h2 += h1;
607 return new long[] { h1, h2 };
608 }
609
610 /**
611 * Generates 32-bit hash from the byte array with a default seed.
612 * This is a helper method that will produce the same result as:
613 *
614 * <pre>
615 * int offset = 0;
616 * int seed = 104729;
617 * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
618 * </pre>
619 *
620 * <p>
621 * This implementation contains a sign-extension bug in the finalization step of
622 * any bytes left over from dividing the length by 4. This manifests if any of these
623 * bytes are negative.
624 * </p>
625 *
626 * @param data The input byte array.
627 * @return The 32-bit hash.
628 * @see #hash32(byte[], int, int, int)
629 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
630 */
631 @Deprecated
632 public static int hash32(final byte[] data) {
633 return hash32(data, 0, data.length, DEFAULT_SEED);
634 }
635
636 /**
637 * Generates 32-bit hash from the byte array with the given length and a default seed.
638 * This is a helper method that will produce the same result as:
639 *
640 * <pre>
641 * int offset = 0;
642 * int seed = 104729;
643 * int hash = MurmurHash3.hash32(data, offset, length, seed);
644 * </pre>
645 *
646 * <p>
647 * This implementation contains a sign-extension bug in the finalization step of
648 * any bytes left over from dividing the length by 4. This manifests if any of these
649 * bytes are negative.
650 * </p>
651 *
652 * @param data The input byte array.
653 * @param length The length of array.
654 * @return The 32-bit hash.
655 * @see #hash32(byte[], int, int, int)
656 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
657 */
658 @Deprecated
659 public static int hash32(final byte[] data, final int length) {
660 return hash32(data, length, DEFAULT_SEED);
661 }
662
663 /**
664 * Generates 32-bit hash from the byte array with the given length and seed. This is a
665 * helper method that will produce the same result as:
666 *
667 * <pre>
668 * int offset = 0;
669 * int hash = MurmurHash3.hash32(data, offset, length, seed);
670 * </pre>
671 *
672 * <p>
673 * This implementation contains a sign-extension bug in the finalization step of
674 * any bytes left over from dividing the length by 4. This manifests if any of these
675 * bytes are negative.
676 * </p>
677 *
678 * @param data The input byte array.
679 * @param length The length of array.
680 * @param seed The initial seed value.
681 * @return The 32-bit hash.
682 * @see #hash32(byte[], int, int, int)
683 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
684 */
685 @Deprecated
686 public static int hash32(final byte[] data, final int length, final int seed) {
687 return hash32(data, 0, length, seed);
688 }
689
690 /**
691 * Generates 32-bit hash from the byte array with the given offset, length and seed.
692 *
693 * <p>
694 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
695 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
696 * </p>
697 *
698 * <p>
699 * This implementation contains a sign-extension bug in the finalization step of
700 * any bytes left over from dividing the length by 4. This manifests if any of these
701 * bytes are negative.
702 * </p>
703 *
704 * @param data The input byte array.
705 * @param offset The offset of data.
706 * @param length The length of array.
707 * @param seed The initial seed value.
708 * @return The 32-bit hash.
709 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
710 */
711 @Deprecated
712 public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
713 int hash = seed;
714 final int nblocks = length >> 2;
715 // body
716 for (int i = 0; i < nblocks; i++) {
717 final int index = offset + (i << 2);
718 final int k = MurmurHash.getLittleEndianInt(data, index);
719 hash = mix32(k, hash);
720 }
721 // tail
722 // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
723 final int index = offset + (nblocks << 2);
724 int k1 = 0;
725 switch (offset + length - index) {
726 case 3:
727 k1 ^= data[index + 2] << 16;
728 // falls-through
729 case 2:
730 k1 ^= data[index + 1] << 8;
731 // falls-through
732 case 1:
733 k1 ^= data[index];
734 // mix functions
735 k1 *= C1_32;
736 k1 = Integer.rotateLeft(k1, R1_32);
737 k1 *= C2_32;
738 hash ^= k1;
739 }
740 hash ^= length;
741 return fmix32(hash);
742 }
743
744 /**
745 * Generates 32-bit hash from a long with a default seed value.
746 * This is a helper method that will produce the same result as:
747 *
748 * <pre>
749 * int offset = 0;
750 * int seed = 104729;
751 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
752 * .putLong(data)
753 * .array(), offset, 8, seed);
754 * </pre>
755 *
756 * @param data The long to hash.
757 * @return The 32-bit hash.
758 * @see #hash32x86(byte[], int, int, int)
759 */
760 public static int hash32(final long data) {
761 return hash32(data, DEFAULT_SEED);
762 }
763
764 /**
765 * Generates 32-bit hash from a long with the given seed.
766 * This is a helper method that will produce the same result as:
767 *
768 * <pre>
769 * int offset = 0;
770 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
771 * .putLong(data)
772 * .array(), offset, 8, seed);
773 * </pre>
774 *
775 * @param data The long to hash.
776 * @param seed The initial seed value.
777 * @return The 32-bit hash.
778 * @see #hash32x86(byte[], int, int, int)
779 */
780 public static int hash32(final long data, final int seed) {
781 int hash = seed;
782 final long r0 = Long.reverseBytes(data);
783
784 hash = mix32((int) r0, hash);
785 hash = mix32((int) (r0 >>> 32), hash);
786
787 hash ^= Long.BYTES;
788 return fmix32(hash);
789 }
790
791 /**
792 * Generates 32-bit hash from two longs with a default seed value.
793 * This is a helper method that will produce the same result as:
794 *
795 * <pre>
796 * int offset = 0;
797 * int seed = 104729;
798 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
799 * .putLong(data1)
800 * .putLong(data2)
801 * .array(), offset, 16, seed);
802 * </pre>
803 *
804 * @param data1 The first long to hash.
805 * @param data2 The second long to hash.
806 * @return The 32-bit hash.
807 * @see #hash32x86(byte[], int, int, int)
808 */
809 public static int hash32(final long data1, final long data2) {
810 return hash32(data1, data2, DEFAULT_SEED);
811 }
812
813 /**
814 * Generates 32-bit hash from two longs with the given seed.
815 * This is a helper method that will produce the same result as:
816 *
817 * <pre>
818 * int offset = 0;
819 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
820 * .putLong(data1)
821 * .putLong(data2)
822 * .array(), offset, 16, seed);
823 * </pre>
824 *
825 * @param data1 The first long to hash.
826 * @param data2 The second long to hash.
827 * @param seed The initial seed value.
828 * @return The 32-bit hash.
829 * @see #hash32x86(byte[], int, int, int)
830 */
831 public static int hash32(final long data1, final long data2, final int seed) {
832 int hash = seed;
833 final long r0 = Long.reverseBytes(data1);
834 final long r1 = Long.reverseBytes(data2);
835
836 hash = mix32((int) r0, hash);
837 hash = mix32((int) (r0 >>> 32), hash);
838 hash = mix32((int) r1, hash);
839 hash = mix32((int) (r1 >>> 32), hash);
840
841 hash ^= Long.BYTES * 2;
842 return fmix32(hash);
843 }
844
845 /**
846 * Generates 32-bit hash from a string with a default seed.
847 * <p>
848 * Before 1.14 the string was converted using default encoding.
849 * Since 1.14 the string is converted to bytes using UTF-8 encoding.
850 * </p>
851 * This is a helper method that will produce the same result as:
852 *
853 * <pre>
854 * int offset = 0;
855 * int seed = 104729;
856 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
857 * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
858 * </pre>
859 *
860 * <p>
861 * This implementation contains a sign-extension bug in the finalization step of
862 * any bytes left over from dividing the length by 4. This manifests if any of these
863 * bytes are negative.
864 * </p>
865 *
866 * @param data The input string.
867 * @return The 32-bit hash.
868 * @see #hash32(byte[], int, int, int)
869 * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
870 * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
871 */
872 @Deprecated
873 public static int hash32(final String data) {
874 final byte[] bytes = StringUtils.getBytesUtf8(data);
875 return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
876 }
877
878 /**
879 * Generates 32-bit hash from the byte array with a seed of zero.
880 * This is a helper method that will produce the same result as:
881 *
882 * <pre>
883 * int offset = 0;
884 * int seed = 0;
885 * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
886 * </pre>
887 *
888 * @param data The input byte array.
889 * @return The 32-bit hash.
890 * @see #hash32x86(byte[], int, int, int)
891 * @since 1.14
892 */
893 public static int hash32x86(final byte[] data) {
894 return hash32x86(data, 0, data.length, 0);
895 }
896
897 /**
898 * Generates 32-bit hash from the byte array with the given offset, length and seed.
899 *
900 * <p>
901 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
902 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
903 * </p>
904 *
905 * @param data The input byte array.
906 * @param offset The offset of data.
907 * @param length The length of array.
908 * @param seed The initial seed value.
909 * @return The 32-bit hash.
910 * @since 1.14
911 */
912 public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
913 int hash = seed;
914 final int nblocks = length >> 2;
915 // body
916 for (int i = 0; i < nblocks; i++) {
917 final int index = offset + (i << 2);
918 final int k = MurmurHash.getLittleEndianInt(data, index);
919 hash = mix32(k, hash);
920 }
921 // tail
922 final int index = offset + (nblocks << 2);
923 int k1 = 0;
924 switch (offset + length - index) {
925 case 3:
926 k1 ^= (data[index + 2] & 0xff) << 16;
927 // falls-through
928 case 2:
929 // falls-through
930 k1 ^= (data[index + 1] & 0xff) << 8;
931 // falls-through
932 case 1:
933 k1 ^= data[index] & 0xff;
934 // mix functions
935 k1 *= C1_32;
936 k1 = Integer.rotateLeft(k1, R1_32);
937 k1 *= C2_32;
938 hash ^= k1;
939 }
940 hash ^= length;
941 return fmix32(hash);
942 }
943
944 /**
945 * Generates 64-bit hash from a byte array with a default seed.
946 *
947 * <p>
948 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
949 * </p>
950 *
951 * <p>
952 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
953 * with the same byte data. This method will be removed in a future release.
954 * </p>
955 *
956 * <p>
957 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
958 * </p>
959 *
960 * <p>
961 * This is a helper method that will produce the same result as:
962 * </p>
963 *
964 * <pre>
965 *
966 * int offset = 0;
967 * int seed = 104729;
968 * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
969 * </pre>
970 *
971 * @param data The input byte array.
972 * @return The 64-bit hash.
973 * @see #hash64(byte[], int, int, int)
974 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])}.
975 */
976 @Deprecated
977 public static long hash64(final byte[] data) {
978 return hash64(data, 0, data.length, DEFAULT_SEED);
979 }
980
981 /**
982 * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
983 *
984 * <p>
985 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
986 * </p>
987 *
988 * <p>
989 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
990 * with the same byte data. This method will be removed in a future release.
991 * </p>
992 *
993 * <p>
994 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
995 * </p>
996 *
997 * <p>
998 * This is a helper method that will produce the same result as:
999 * </p>
1000 *
1001 * <pre>
1002 *
1003 * int seed = 104729;
1004 * long hash = MurmurHash3.hash64(data, offset, length, seed);
1005 * </pre>
1006 *
1007 * @param data The input byte array.
1008 * @param offset The offset of data.
1009 * @param length The length of array.
1010 * @return The 64-bit hash.
1011 * @see #hash64(byte[], int, int, int)
1012 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1013 */
1014 @Deprecated
1015 public static long hash64(final byte[] data, final int offset, final int length) {
1016 return hash64(data, offset, length, DEFAULT_SEED);
1017 }
1018
1019 /**
1020 * Generates 64-bit hash from a byte array with the given offset, length and seed.
1021 *
1022 * <p>
1023 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1024 * </p>
1025 *
1026 * <p>
1027 * This is a Murmur3-like 64-bit variant. This method will be removed in a future release.
1028 * </p>
1029 *
1030 * <p>
1031 * This implementation contains a sign-extension bug in the seed initialization. This manifests if the seed is negative.
1032 * </p>
1033 *
1034 * <p>
1035 * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks of data processed in the MurmurHash3
1036 * {@code MurmurHash3_x64_128} method. However the hash is not mixed with a hash chunk from the next 8 bytes of data. The method will not return the same
1037 * value as the first or second 64-bits of the function {@link #hash128(byte[], int, int, int)}.
1038 * </p>
1039 *
1040 * <p>
1041 * Use of this method is not advised. Use the first long returned from {@link #hash128x64(byte[], int, int, int)}.
1042 * </p>
1043 *
1044 * @param data The input byte array.
1045 * @param offset The offset of data.
1046 * @param length The length of array.
1047 * @param seed The initial seed value.
1048 * @return The 64-bit hash.
1049 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1050 */
1051 @Deprecated
1052 public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1053 // Note: This fails to apply masking using 0xffffffffL to the seed.
1054 long hash = seed;
1055 final int nblocks = length >> 3;
1056 // body
1057 for (int i = 0; i < nblocks; i++) {
1058 final int index = offset + (i << 3);
1059 long k = MurmurHash.getLittleEndianLong(data, index);
1060 // mix functions
1061 k *= C1;
1062 k = Long.rotateLeft(k, R1);
1063 k *= C2;
1064 hash ^= k;
1065 hash = Long.rotateLeft(hash, R2) * M + N1;
1066 }
1067 // tail
1068 long k1 = 0;
1069 final int index = offset + (nblocks << 3);
1070 switch (offset + length - index) {
1071 case 7:
1072 k1 ^= ((long) data[index + 6] & 0xff) << 48;
1073 // falls-through
1074 case 6:
1075 k1 ^= ((long) data[index + 5] & 0xff) << 40;
1076 // falls-through
1077 case 5:
1078 k1 ^= ((long) data[index + 4] & 0xff) << 32;
1079 // falls-through
1080 case 4:
1081 k1 ^= ((long) data[index + 3] & 0xff) << 24;
1082 // falls-through
1083 case 3:
1084 k1 ^= ((long) data[index + 2] & 0xff) << 16;
1085 // falls-through
1086 case 2:
1087 k1 ^= ((long) data[index + 1] & 0xff) << 8;
1088 // falls-through
1089 case 1:
1090 k1 ^= (long) data[index] & 0xff;
1091 k1 *= C1;
1092 k1 = Long.rotateLeft(k1, R1);
1093 k1 *= C2;
1094 hash ^= k1;
1095 }
1096 // finalization
1097 hash ^= length;
1098 return fmix64(hash);
1099 }
1100
1101 /**
1102 * Generates 64-bit hash from an int with a default seed.
1103 *
1104 * <p>
1105 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1106 * </p>
1107 *
1108 * <p>
1109 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1110 * with the same byte data from the {@code int}. This method will be removed in a future release.
1111 * </p>
1112 *
1113 * <p>
1114 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1115 * </p>
1116 *
1117 * <p>
1118 * This is a helper method that will produce the same result as:
1119 * </p>
1120 *
1121 * <pre>
1122 *
1123 * int offset = 0;
1124 * int seed = 104729;
1125 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4).putInt(data).array(), offset, 4, seed);
1126 * </pre>
1127 *
1128 * @param data The int to hash.
1129 * @return The 64-bit hash.
1130 * @see #hash64(byte[], int, int, int)
1131 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1132 */
1133 @Deprecated
1134 public static long hash64(final int data) {
1135 long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1136 long hash = DEFAULT_SEED;
1137 k1 *= C1;
1138 k1 = Long.rotateLeft(k1, R1);
1139 k1 *= C2;
1140 hash ^= k1;
1141 // finalization
1142 hash ^= Integer.BYTES;
1143 return fmix64(hash);
1144 }
1145
1146 /**
1147 * Generates 64-bit hash from a long with a default seed.
1148 *
1149 * <p>
1150 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1151 * </p>
1152 *
1153 * <p>
1154 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1155 * with the same byte data from the {@code long}. This method will be removed in a future release.
1156 * </p>
1157 *
1158 * <p>
1159 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1160 * </p>
1161 *
1162 * <p>
1163 * This is a helper method that will produce the same result as:
1164 * </p>
1165 *
1166 * <pre>
1167 *
1168 * int offset = 0;
1169 * int seed = 104729;
1170 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8).putLong(data).array(), offset, 8, seed);
1171 * </pre>
1172 *
1173 * @param data The long to hash.
1174 * @return The 64-bit hash.
1175 * @see #hash64(byte[], int, int, int)
1176 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1177 */
1178 @Deprecated
1179 public static long hash64(final long data) {
1180 long hash = DEFAULT_SEED;
1181 long k = Long.reverseBytes(data);
1182 // mix functions
1183 k *= C1;
1184 k = Long.rotateLeft(k, R1);
1185 k *= C2;
1186 hash ^= k;
1187 hash = Long.rotateLeft(hash, R2) * M + N1;
1188 // finalization
1189 hash ^= Long.BYTES;
1190 return fmix64(hash);
1191 }
1192
1193 /**
1194 * Generates 64-bit hash from a short with a default seed.
1195 *
1196 * <p>
1197 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1198 * </p>
1199 *
1200 * <p>
1201 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1202 * with the same byte data from the {@code short}. This method will be removed in a future release.
1203 * </p>
1204 *
1205 * <p>
1206 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1207 * </p>
1208 *
1209 * <p>
1210 * This is a helper method that will produce the same result as:
1211 * </p>
1212 *
1213 * <pre>
1214 *
1215 * int offset = 0;
1216 * int seed = 104729;
1217 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2).putShort(data).array(), offset, 2, seed);
1218 * </pre>
1219 *
1220 * @param data The short to hash.
1221 * @return The 64-bit hash.
1222 * @see #hash64(byte[], int, int, int)
1223 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the
1224 * {@code short}.
1225 */
1226 @Deprecated
1227 public static long hash64(final short data) {
1228 long hash = DEFAULT_SEED;
1229 long k1 = 0;
1230 k1 ^= ((long) data & 0xff) << 8;
1231 k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1232 k1 *= C1;
1233 k1 = Long.rotateLeft(k1, R1);
1234 k1 *= C2;
1235 hash ^= k1;
1236 // finalization
1237 hash ^= Short.BYTES;
1238 return fmix64(hash);
1239 }
1240
1241 /**
1242 * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1243 *
1244 * @param k The data to add to the hash.
1245 * @param hash The current hash.
1246 * @return The new hash.
1247 */
1248 private static int mix32(int k, int hash) {
1249 k *= C1_32;
1250 k = Integer.rotateLeft(k, R1_32);
1251 k *= C2_32;
1252 hash ^= k;
1253 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1254 }
1255
1256 /** No instance methods. */
1257 private MurmurHash3() {
1258 }
1259 }