View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import org.apache.commons.codec.binary.StringUtils;
21  
22  /**
23   * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24   *
25   * <p>
26   * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27   * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28   * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29   * </p>
30   *
31   * <p>
32   * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33   * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34   * </p>
35   *
36   * <p>
37   * This is public domain code with no copyrights. From home page of
38   * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39   * </p>
40   *
41   * <blockquote>
42   * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
43   * code."
44   * </blockquote>
45   *
46   * <p>
47   * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
48   * That adaption contains a {@code hash64} method that is not part of the original
49   * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
50   * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
51   * </p>
52   *
53   * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
54   * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
55   *      code</a>
56   * @see <a href=
57   *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
58   *      Apache Hive Murmer3</a>
59   * @since 1.13
60   */
61  public final class MurmurHash3 {
62  
63      /**
64       * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
65       * hash computed.
66       *
67       * <p>
68       * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
69       * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
70       * </p>
71       *
72       * <p>
73       * This implementation contains a sign-extension bug in the finalization step of
74       * any bytes left over from dividing the length by 4. This manifests if any of these
75       * bytes are negative.
76       * </p>
77       *
78       * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
79       */
80      @Deprecated
81      public static class IncrementalHash32 extends IncrementalHash32x86 {
82  
83          /**
84           * Constructs a new instance.
85           */
86          public IncrementalHash32() {
87              // empty
88          }
89  
90          /**
91           * {@inheritDoc}
92           *
93           * <p>
94           * This implementation contains a sign-extension bug in the finalization step of
95           * any bytes left over from dividing the length by 4. This manifests if any of these
96           * bytes are negative.
97           * <p>
98           *
99           * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100          */
101         @Override
102         @Deprecated
103         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104             int result = hash;
105             // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106             int k1 = 0;
107             switch (unprocessedLength) {
108             case 3:
109                 k1 ^= unprocessed[2] << 16;
110                 // falls-through
111             case 2:
112                 k1 ^= unprocessed[1] << 8;
113                 // falls-through
114             case 1:
115                 k1 ^= unprocessed[0];
116                 // mix functions
117                 k1 *= C1_32;
118                 k1 = Integer.rotateLeft(k1, R1_32);
119                 k1 *= C2_32;
120                 result ^= k1;
121             }
122             // finalization
123             result ^= totalLen;
124             return fmix32(result);
125         }
126     }
127 
128     /**
129      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130      * hash computed.
131      *
132      * <p>
133      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135      * </p>
136      *
137      * @since 1.14
138      */
139     public static class IncrementalHash32x86 {
140 
141         /** The size of byte blocks that are processed together. */
142         private static final int BLOCK_SIZE = 4;
143 
144         /**
145          * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146          * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147          * significant.
148          *
149          * @param b1 The first byte.
150          * @param b2 The second byte.
151          * @param b3 The third byte.
152          * @param b4 The fourth byte.
153          * @return The 32-bit integer.
154          */
155         private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156             return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157         }
158 
159         /** Up to 3 unprocessed bytes from input data. */
160         private final byte[] unprocessed = new byte[3];
161 
162         /** The number of unprocessed bytes in the tail data. */
163         private int unprocessedLength;
164 
165         /** The total number of input bytes added since the start. */
166         private int totalLen;
167 
168         /**
169          * The current running hash.
170          * This must be finalized to generate the 32-bit hash value.
171          */
172         private int hash;
173 
174         /**
175          * Constructs a new instance.
176          */
177         public IncrementalHash32x86() {
178             // empty
179         }
180 
181         /**
182          * Adds the byte array to the current incremental hash.
183          *
184          * @param data The input byte array.
185          * @param offset The offset of data.
186          * @param length The length of array.
187          */
188         public final void add(final byte[] data, final int offset, final int length) {
189             if (length <= 0) {
190                 // Nothing to add
191                 return;
192             }
193             totalLen += length;
194             // Process the bytes in blocks of 4.
195             // New bytes must be added to any current unprocessed bytes,
196             // then processed in blocks of 4 and the remaining bytes saved:
197             //
198             //    |--|---------------------------|--|
199             // unprocessed
200             //                main block
201             //                                remaining
202 
203             // Check if the unprocessed bytes and new bytes can fill a block of 4.
204             // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205             // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206             if (unprocessedLength + length - BLOCK_SIZE < 0) {
207                 // Not enough so add to the unprocessed bytes
208                 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209                 unprocessedLength += length;
210                 return;
211             }
212             // Combine unprocessed bytes with new bytes.
213             final int newOffset;
214             final int newLength;
215             if (unprocessedLength > 0) {
216                 int k = -1;
217                 switch (unprocessedLength) {
218                 case 1:
219                     k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220                     break;
221                 case 2:
222                     k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223                     break;
224                 case 3:
225                     k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226                     break;
227                 default:
228                     throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229                 }
230                 hash = mix32(k, hash);
231                 // Update the offset and length
232                 final int consumed = BLOCK_SIZE - unprocessedLength;
233                 newOffset = offset + consumed;
234                 newLength = length - consumed;
235             } else {
236                 newOffset = offset;
237                 newLength = length;
238             }
239             // Main processing of blocks of 4 bytes
240             final int nblocks = newLength >> 2;
241 
242             for (int i = 0; i < nblocks; i++) {
243                 final int index = newOffset + (i << 2);
244                 final int k = MurmurHash.getLittleEndianInt(data, index);
245                 hash = mix32(k, hash);
246             }
247             // Save left-over unprocessed bytes
248             final int consumed = nblocks << 2;
249             unprocessedLength = newLength - consumed;
250             if (unprocessedLength != 0) {
251                 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252             }
253         }
254 
255         /**
256          * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257          * will generate the same hash value.
258          *
259          * @return The 32-bit hash.
260          */
261         public final int end() {
262             // Allow calling end() again after adding no data to return the same result.
263             return finalise(hash, unprocessedLength, unprocessed, totalLen);
264         }
265 
266         /**
267          * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268          * and performing final mixing.
269          *
270          * @param hash The running hash.
271          * @param unprocessedLength The number of unprocessed bytes in the tail data.
272          * @param unprocessed Up to 3 unprocessed bytes from input data.
273          * @param totalLen The total number of input bytes added since the start.
274          * @return The 32-bit hash.
275          */
276         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277             int result = hash;
278             int k1 = 0;
279             switch (unprocessedLength) {
280             case 3:
281                 k1 ^= (unprocessed[2] & 0xff) << 16;
282                 // falls-through
283             case 2:
284                 k1 ^= (unprocessed[1] & 0xff) << 8;
285                 // falls-through
286             case 1:
287                 k1 ^= unprocessed[0] & 0xff;
288                 // mix functions
289                 k1 *= C1_32;
290                 k1 = Integer.rotateLeft(k1, R1_32);
291                 k1 *= C2_32;
292                 result ^= k1;
293             }
294             // finalization
295             result ^= totalLen;
296             return fmix32(result);
297         }
298 
299         /**
300          * Starts a new incremental hash.
301          *
302          * @param seed The initial seed value.
303          */
304         public final void start(final int seed) {
305             // Reset
306             unprocessedLength = totalLen = 0;
307             hash = seed;
308         }
309     }
310 
311     /**
312      * A random number to use for a hash code.
313      *
314      * @deprecated This is not used internally and will be removed in a future release.
315      */
316     @Deprecated
317     public static final long NULL_HASHCODE = 2862933555777941757L;
318 
319     /**
320      * A default seed to use for the murmur hash algorithm.
321      * Has the value {@code 104729}.
322      */
323     public static final int DEFAULT_SEED = 104729;
324     // Constants for 32-bit variant
325     private static final int C1_32 = 0xcc9e2d51;
326     private static final int C2_32 = 0x1b873593;
327     private static final int R1_32 = 15;
328     private static final int R2_32 = 13;
329 
330     private static final int M_32 = 5;
331     private static final int N_32 = 0xe6546b64;
332     // Constants for 128-bit variant
333     private static final long C1 = 0x87c37b91114253d5L;
334     private static final long C2 = 0x4cf5ad432745937fL;
335     private static final int R1 = 31;
336     private static final int R2 = 27;
337     private static final int R3 = 33;
338     private static final int M = 5;
339 
340     private static final int N1 = 0x52dce729;
341 
342     private static final int N2 = 0x38495ab5;
343 
344     /**
345      * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
346      *
347      * @param hash The current hash.
348      * @return The final hash.
349      */
350     private static int fmix32(int hash) {
351         hash ^= hash >>> 16;
352         hash *= 0x85ebca6b;
353         hash ^= hash >>> 13;
354         hash *= 0xc2b2ae35;
355         hash ^= hash >>> 16;
356         return hash;
357     }
358 
359     /**
360      * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
361      *
362      * @param hash The current hash.
363      * @return The final hash.
364      */
365     private static long fmix64(long hash) {
366         hash ^= hash >>> 33;
367         hash *= 0xff51afd7ed558ccdL;
368         hash ^= hash >>> 33;
369         hash *= 0xc4ceb9fe1a85ec53L;
370         hash ^= hash >>> 33;
371         return hash;
372     }
373 
374     /**
375      * Generates 128-bit hash from the byte array with a default seed.
376      * This is a helper method that will produce the same result as:
377      *
378      * <pre>
379      * int offset = 0;
380      * int seed = 104729;
381      * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
382      * </pre>
383      *
384      * <p>
385      * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
386      * this result as the default seed is positive.
387      * </p>
388      *
389      * @param data The input byte array.
390      * @return The 128-bit hash (2 longs).
391      * @see #hash128(byte[], int, int, int)
392      */
393     public static long[] hash128(final byte[] data) {
394         return hash128(data, 0, data.length, DEFAULT_SEED);
395     }
396 
397     /**
398      * Generates 128-bit hash from the byte array with the given offset, length and seed.
399      *
400      * <p>
401      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
402      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
403      * </p>
404      *
405      * <p>
406      * This implementation contains a sign-extension bug in the seed initialization.
407      * This manifests if the seed is negative.
408      * </p>
409      *
410      * @param data The input byte array.
411      * @param offset The first element of array.
412      * @param length The length of array.
413      * @param seed The initial seed value.
414      * @return The 128-bit hash (2 longs).
415      * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
416      */
417     @Deprecated
418     public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
419         // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
420         // to maintain behavioral compatibility with the original version.
421         // The implicit conversion to a long will extend a negative sign
422         // bit through the upper 32-bits of the long seed. These should be zero.
423         return hash128x64Internal(data, offset, length, seed);
424     }
425 
426     /**
427      * Generates 128-bit hash from a string with a default seed.
428      *
429      * <p>
430      * Before 1.14 the string was converted using default encoding. Since 1.14 the string is converted to bytes using UTF-8 encoding.
431      * </p>
432      * <p>
433      * This is a helper method that will produce the same result as:
434      * </p>
435      *
436      * <pre>
437      * int offset = 0;
438      * int seed = 104729;
439      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
440      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
441      * </pre>
442      *
443      * <p>
444      * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect this result as the default seed is positive.
445      * </p>
446      *
447      * @param data The input String.
448      * @return The 128-bit hash (2 longs).
449      * @see #hash128(byte[], int, int, int)
450      * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from {@link String#getBytes(java.nio.charset.Charset)}.
451      */
452     @Deprecated
453     public static long[] hash128(final String data) {
454         final byte[] bytes = StringUtils.getBytesUtf8(data);
455         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
456     }
457 
458     /**
459      * Generates 128-bit hash from the byte array with a seed of zero.
460      * This is a helper method that will produce the same result as:
461      *
462      * <pre>
463      * int offset = 0;
464      * int seed = 0;
465      * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
466      * </pre>
467      *
468      * @param data The input byte array.
469      * @return The 128-bit hash (2 longs).
470      * @see #hash128x64(byte[], int, int, int)
471      * @since 1.14
472      */
473     public static long[] hash128x64(final byte[] data) {
474         return hash128x64(data, 0, data.length, 0);
475     }
476 
477     /**
478      * Generates 128-bit hash from the byte array with the given offset, length and seed.
479      *
480      * <p>
481      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
482      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
483      * </p>
484      *
485      * @param data The input byte array.
486      * @param offset The first element of array.
487      * @param length The length of array.
488      * @param seed The initial seed value.
489      * @return The 128-bit hash (2 longs).
490      * @since 1.14
491      */
492     public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
493         // Use an unsigned 32-bit integer as the seed
494         return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
495     }
496 
497     /**
498      * Generates 128-bit hash from the byte array with the given offset, length and seed.
499      *
500      * <p>
501      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
502      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
503      * </p>
504      *
505      * @param data The input byte array.
506      * @param offset The first element of array.
507      * @param length The length of array.
508      * @param seed The initial seed value.
509      * @return The 128-bit hash (2 longs).
510      */
511     private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
512         long h1 = seed;
513         long h2 = seed;
514         final int nblocks = length >> 4;
515         // body
516         for (int i = 0; i < nblocks; i++) {
517             final int index = offset + (i << 4);
518             long k1 = MurmurHash.getLittleEndianLong(data, index);
519             long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
520             // mix functions for k1
521             k1 *= C1;
522             k1 = Long.rotateLeft(k1, R1);
523             k1 *= C2;
524             h1 ^= k1;
525             h1 = Long.rotateLeft(h1, R2);
526             h1 += h2;
527             h1 = h1 * M + N1;
528             // mix functions for k2
529             k2 *= C2;
530             k2 = Long.rotateLeft(k2, R3);
531             k2 *= C1;
532             h2 ^= k2;
533             h2 = Long.rotateLeft(h2, R1);
534             h2 += h1;
535             h2 = h2 * M + N2;
536         }
537         // tail
538         long k1 = 0;
539         long k2 = 0;
540         final int index = offset + (nblocks << 4);
541         switch (offset + length - index) {
542         case 15:
543             k2 ^= ((long) data[index + 14] & 0xff) << 48;
544             // falls-through
545         case 14:
546             k2 ^= ((long) data[index + 13] & 0xff) << 40;
547             // falls-through
548         case 13:
549             k2 ^= ((long) data[index + 12] & 0xff) << 32;
550             // falls-through
551         case 12:
552             k2 ^= ((long) data[index + 11] & 0xff) << 24;
553             // falls-through
554         case 11:
555             k2 ^= ((long) data[index + 10] & 0xff) << 16;
556             // falls-through
557         case 10:
558             k2 ^= ((long) data[index + 9] & 0xff) << 8;
559             // falls-through
560         case 9:
561             k2 ^= data[index + 8] & 0xff;
562             k2 *= C2;
563             k2 = Long.rotateLeft(k2, R3);
564             k2 *= C1;
565             h2 ^= k2;
566             // falls-through
567         case 8:
568             k1 ^= ((long) data[index + 7] & 0xff) << 56;
569             // falls-through
570         case 7:
571             k1 ^= ((long) data[index + 6] & 0xff) << 48;
572             // falls-through
573         case 6:
574             k1 ^= ((long) data[index + 5] & 0xff) << 40;
575             // falls-through
576         case 5:
577             k1 ^= ((long) data[index + 4] & 0xff) << 32;
578             // falls-through
579         case 4:
580             k1 ^= ((long) data[index + 3] & 0xff) << 24;
581             // falls-through
582         case 3:
583             k1 ^= ((long) data[index + 2] & 0xff) << 16;
584             // falls-through
585         case 2:
586             k1 ^= ((long) data[index + 1] & 0xff) << 8;
587             // falls-through
588         case 1:
589             k1 ^= data[index] & 0xff;
590             k1 *= C1;
591             k1 = Long.rotateLeft(k1, R1);
592             k1 *= C2;
593             h1 ^= k1;
594         }
595         // finalization
596         h1 ^= length;
597         h2 ^= length;
598 
599         h1 += h2;
600         h2 += h1;
601 
602         h1 = fmix64(h1);
603         h2 = fmix64(h2);
604 
605         h1 += h2;
606         h2 += h1;
607         return new long[] { h1, h2 };
608     }
609 
610     /**
611      * Generates 32-bit hash from the byte array with a default seed.
612      * This is a helper method that will produce the same result as:
613      *
614      * <pre>
615      * int offset = 0;
616      * int seed = 104729;
617      * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
618      * </pre>
619      *
620      * <p>
621      * This implementation contains a sign-extension bug in the finalization step of
622      * any bytes left over from dividing the length by 4. This manifests if any of these
623      * bytes are negative.
624      * </p>
625      *
626      * @param data The input byte array.
627      * @return The 32-bit hash.
628      * @see #hash32(byte[], int, int, int)
629      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
630      */
631     @Deprecated
632     public static int hash32(final byte[] data) {
633         return hash32(data, 0, data.length, DEFAULT_SEED);
634     }
635 
636     /**
637      * Generates 32-bit hash from the byte array with the given length and a default seed.
638      * This is a helper method that will produce the same result as:
639      *
640      * <pre>
641      * int offset = 0;
642      * int seed = 104729;
643      * int hash = MurmurHash3.hash32(data, offset, length, seed);
644      * </pre>
645      *
646      * <p>
647      * This implementation contains a sign-extension bug in the finalization step of
648      * any bytes left over from dividing the length by 4. This manifests if any of these
649      * bytes are negative.
650      * </p>
651      *
652      * @param data The input byte array.
653      * @param length The length of array.
654      * @return The 32-bit hash.
655      * @see #hash32(byte[], int, int, int)
656      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
657      */
658     @Deprecated
659     public static int hash32(final byte[] data, final int length) {
660         return hash32(data, length, DEFAULT_SEED);
661     }
662 
663     /**
664      * Generates 32-bit hash from the byte array with the given length and seed. This is a
665      * helper method that will produce the same result as:
666      *
667      * <pre>
668      * int offset = 0;
669      * int hash = MurmurHash3.hash32(data, offset, length, seed);
670      * </pre>
671      *
672      * <p>
673      * This implementation contains a sign-extension bug in the finalization step of
674      * any bytes left over from dividing the length by 4. This manifests if any of these
675      * bytes are negative.
676      * </p>
677      *
678      * @param data The input byte array.
679      * @param length The length of array.
680      * @param seed The initial seed value.
681      * @return The 32-bit hash.
682      * @see #hash32(byte[], int, int, int)
683      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
684      */
685     @Deprecated
686     public static int hash32(final byte[] data, final int length, final int seed) {
687         return hash32(data, 0, length, seed);
688     }
689 
690     /**
691      * Generates 32-bit hash from the byte array with the given offset, length and seed.
692      *
693      * <p>
694      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
695      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
696      * </p>
697      *
698      * <p>
699      * This implementation contains a sign-extension bug in the finalization step of
700      * any bytes left over from dividing the length by 4. This manifests if any of these
701      * bytes are negative.
702      * </p>
703      *
704      * @param data The input byte array.
705      * @param offset The offset of data.
706      * @param length The length of array.
707      * @param seed The initial seed value.
708      * @return The 32-bit hash.
709      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
710      */
711     @Deprecated
712     public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
713         int hash = seed;
714         final int nblocks = length >> 2;
715         // body
716         for (int i = 0; i < nblocks; i++) {
717             final int index = offset + (i << 2);
718             final int k = MurmurHash.getLittleEndianInt(data, index);
719             hash = mix32(k, hash);
720         }
721         // tail
722         // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
723         final int index = offset + (nblocks << 2);
724         int k1 = 0;
725         switch (offset + length - index) {
726         case 3:
727             k1 ^= data[index + 2] << 16;
728             // falls-through
729         case 2:
730             k1 ^= data[index + 1] << 8;
731             // falls-through
732         case 1:
733             k1 ^= data[index];
734             // mix functions
735             k1 *= C1_32;
736             k1 = Integer.rotateLeft(k1, R1_32);
737             k1 *= C2_32;
738             hash ^= k1;
739         }
740         hash ^= length;
741         return fmix32(hash);
742     }
743 
744     /**
745      * Generates 32-bit hash from a long with a default seed value.
746      * This is a helper method that will produce the same result as:
747      *
748      * <pre>
749      * int offset = 0;
750      * int seed = 104729;
751      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
752      *                                            .putLong(data)
753      *                                            .array(), offset, 8, seed);
754      * </pre>
755      *
756      * @param data The long to hash.
757      * @return The 32-bit hash.
758      * @see #hash32x86(byte[], int, int, int)
759      */
760     public static int hash32(final long data) {
761         return hash32(data, DEFAULT_SEED);
762     }
763 
764     /**
765      * Generates 32-bit hash from a long with the given seed.
766      * This is a helper method that will produce the same result as:
767      *
768      * <pre>
769      * int offset = 0;
770      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
771      *                                            .putLong(data)
772      *                                            .array(), offset, 8, seed);
773      * </pre>
774      *
775      * @param data The long to hash.
776      * @param seed The initial seed value.
777      * @return The 32-bit hash.
778      * @see #hash32x86(byte[], int, int, int)
779      */
780     public static int hash32(final long data, final int seed) {
781         int hash = seed;
782         final long r0 = Long.reverseBytes(data);
783 
784         hash = mix32((int) r0, hash);
785         hash = mix32((int) (r0 >>> 32), hash);
786 
787         hash ^= Long.BYTES;
788         return fmix32(hash);
789     }
790 
791     /**
792      * Generates 32-bit hash from two longs with a default seed value.
793      * This is a helper method that will produce the same result as:
794      *
795      * <pre>
796      * int offset = 0;
797      * int seed = 104729;
798      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
799      *                                            .putLong(data1)
800      *                                            .putLong(data2)
801      *                                            .array(), offset, 16, seed);
802      * </pre>
803      *
804      * @param data1 The first long to hash.
805      * @param data2 The second long to hash.
806      * @return The 32-bit hash.
807      * @see #hash32x86(byte[], int, int, int)
808      */
809     public static int hash32(final long data1, final long data2) {
810         return hash32(data1, data2, DEFAULT_SEED);
811     }
812 
813     /**
814      * Generates 32-bit hash from two longs with the given seed.
815      * This is a helper method that will produce the same result as:
816      *
817      * <pre>
818      * int offset = 0;
819      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
820      *                                            .putLong(data1)
821      *                                            .putLong(data2)
822      *                                            .array(), offset, 16, seed);
823      * </pre>
824      *
825      * @param data1 The first long to hash.
826      * @param data2 The second long to hash.
827      * @param seed The initial seed value.
828      * @return The 32-bit hash.
829      * @see #hash32x86(byte[], int, int, int)
830      */
831     public static int hash32(final long data1, final long data2, final int seed) {
832         int hash = seed;
833         final long r0 = Long.reverseBytes(data1);
834         final long r1 = Long.reverseBytes(data2);
835 
836         hash = mix32((int) r0, hash);
837         hash = mix32((int) (r0 >>> 32), hash);
838         hash = mix32((int) r1, hash);
839         hash = mix32((int) (r1 >>> 32), hash);
840 
841         hash ^= Long.BYTES * 2;
842         return fmix32(hash);
843     }
844 
845     /**
846      * Generates 32-bit hash from a string with a default seed.
847      * <p>
848      * Before 1.14 the string was converted using default encoding.
849      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
850      * </p>
851      * This is a helper method that will produce the same result as:
852      *
853      * <pre>
854      * int offset = 0;
855      * int seed = 104729;
856      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
857      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
858      * </pre>
859      *
860      * <p>
861      * This implementation contains a sign-extension bug in the finalization step of
862      * any bytes left over from dividing the length by 4. This manifests if any of these
863      * bytes are negative.
864      * </p>
865      *
866      * @param data The input string.
867      * @return The 32-bit hash.
868      * @see #hash32(byte[], int, int, int)
869      * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
870      * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
871      */
872     @Deprecated
873     public static int hash32(final String data) {
874         final byte[] bytes = StringUtils.getBytesUtf8(data);
875         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
876     }
877 
878     /**
879      * Generates 32-bit hash from the byte array with a seed of zero.
880      * This is a helper method that will produce the same result as:
881      *
882      * <pre>
883      * int offset = 0;
884      * int seed = 0;
885      * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
886      * </pre>
887      *
888      * @param data The input byte array.
889      * @return The 32-bit hash.
890      * @see #hash32x86(byte[], int, int, int)
891      * @since 1.14
892      */
893     public static int hash32x86(final byte[] data) {
894         return hash32x86(data, 0, data.length, 0);
895     }
896 
897     /**
898      * Generates 32-bit hash from the byte array with the given offset, length and seed.
899      *
900      * <p>
901      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
902      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
903      * </p>
904      *
905      * @param data The input byte array.
906      * @param offset The offset of data.
907      * @param length The length of array.
908      * @param seed The initial seed value.
909      * @return The 32-bit hash.
910      * @since 1.14
911      */
912     public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
913         int hash = seed;
914         final int nblocks = length >> 2;
915         // body
916         for (int i = 0; i < nblocks; i++) {
917             final int index = offset + (i << 2);
918             final int k = MurmurHash.getLittleEndianInt(data, index);
919             hash = mix32(k, hash);
920         }
921         // tail
922         final int index = offset + (nblocks << 2);
923         int k1 = 0;
924         switch (offset + length - index) {
925         case 3:
926             k1 ^= (data[index + 2] & 0xff) << 16;
927             // falls-through
928         case 2:
929             // falls-through
930             k1 ^= (data[index + 1] & 0xff) << 8;
931             // falls-through
932         case 1:
933             k1 ^= data[index] & 0xff;
934             // mix functions
935             k1 *= C1_32;
936             k1 = Integer.rotateLeft(k1, R1_32);
937             k1 *= C2_32;
938             hash ^= k1;
939         }
940         hash ^= length;
941         return fmix32(hash);
942     }
943 
944     /**
945      * Generates 64-bit hash from a byte array with a default seed.
946      *
947      * <p>
948      * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
949      * </p>
950      *
951      * <p>
952      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
953      * with the same byte data. This method will be removed in a future release.
954      * </p>
955      *
956      * <p>
957      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
958      * </p>
959      *
960      * <p>
961      * This is a helper method that will produce the same result as:
962      * </p>
963      *
964      * <pre>
965      *
966      * int offset = 0;
967      * int seed = 104729;
968      * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
969      * </pre>
970      *
971      * @param data The input byte array.
972      * @return The 64-bit hash.
973      * @see #hash64(byte[], int, int, int)
974      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])}.
975      */
976     @Deprecated
977     public static long hash64(final byte[] data) {
978         return hash64(data, 0, data.length, DEFAULT_SEED);
979     }
980 
981     /**
982      * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
983      *
984      * <p>
985      * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
986      * </p>
987      *
988      * <p>
989      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
990      * with the same byte data. This method will be removed in a future release.
991      * </p>
992      *
993      * <p>
994      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
995      * </p>
996      *
997      * <p>
998      * This is a helper method that will produce the same result as:
999      * </p>
1000      *
1001      * <pre>
1002      *
1003      * int seed = 104729;
1004      * long hash = MurmurHash3.hash64(data, offset, length, seed);
1005      * </pre>
1006      *
1007      * @param data   The input byte array.
1008      * @param offset The offset of data.
1009      * @param length The length of array.
1010      * @return The 64-bit hash.
1011      * @see #hash64(byte[], int, int, int)
1012      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1013      */
1014     @Deprecated
1015     public static long hash64(final byte[] data, final int offset, final int length) {
1016         return hash64(data, offset, length, DEFAULT_SEED);
1017     }
1018 
1019     /**
1020      * Generates 64-bit hash from a byte array with the given offset, length and seed.
1021      *
1022      * <p>
1023      * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1024      * </p>
1025      *
1026      * <p>
1027      * This is a Murmur3-like 64-bit variant. This method will be removed in a future release.
1028      * </p>
1029      *
1030      * <p>
1031      * This implementation contains a sign-extension bug in the seed initialization. This manifests if the seed is negative.
1032      * </p>
1033      *
1034      * <p>
1035      * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks of data processed in the MurmurHash3
1036      * {@code MurmurHash3_x64_128} method. However the hash is not mixed with a hash chunk from the next 8 bytes of data. The method will not return the same
1037      * value as the first or second 64-bits of the function {@link #hash128(byte[], int, int, int)}.
1038      * </p>
1039      *
1040      * <p>
1041      * Use of this method is not advised. Use the first long returned from {@link #hash128x64(byte[], int, int, int)}.
1042      * </p>
1043      *
1044      * @param data   The input byte array.
1045      * @param offset The offset of data.
1046      * @param length The length of array.
1047      * @param seed   The initial seed value.
1048      * @return The 64-bit hash.
1049      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1050      */
1051     @Deprecated
1052     public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1053         // Note: This fails to apply masking using 0xffffffffL to the seed.
1054         long hash = seed;
1055         final int nblocks = length >> 3;
1056         // body
1057         for (int i = 0; i < nblocks; i++) {
1058             final int index = offset + (i << 3);
1059             long k = MurmurHash.getLittleEndianLong(data, index);
1060             // mix functions
1061             k *= C1;
1062             k = Long.rotateLeft(k, R1);
1063             k *= C2;
1064             hash ^= k;
1065             hash = Long.rotateLeft(hash, R2) * M + N1;
1066         }
1067         // tail
1068         long k1 = 0;
1069         final int index = offset + (nblocks << 3);
1070         switch (offset + length - index) {
1071         case 7:
1072             k1 ^= ((long) data[index + 6] & 0xff) << 48;
1073             // falls-through
1074         case 6:
1075             k1 ^= ((long) data[index + 5] & 0xff) << 40;
1076             // falls-through
1077         case 5:
1078             k1 ^= ((long) data[index + 4] & 0xff) << 32;
1079             // falls-through
1080         case 4:
1081             k1 ^= ((long) data[index + 3] & 0xff) << 24;
1082             // falls-through
1083         case 3:
1084             k1 ^= ((long) data[index + 2] & 0xff) << 16;
1085             // falls-through
1086         case 2:
1087             k1 ^= ((long) data[index + 1] & 0xff) << 8;
1088             // falls-through
1089         case 1:
1090             k1 ^= (long) data[index] & 0xff;
1091             k1 *= C1;
1092             k1 = Long.rotateLeft(k1, R1);
1093             k1 *= C2;
1094             hash ^= k1;
1095         }
1096         // finalization
1097         hash ^= length;
1098         return fmix64(hash);
1099     }
1100 
1101     /**
1102      * Generates 64-bit hash from an int with a default seed.
1103      *
1104      * <p>
1105      * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1106      * </p>
1107      *
1108      * <p>
1109      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1110      * with the same byte data from the {@code int}. This method will be removed in a future release.
1111      * </p>
1112      *
1113      * <p>
1114      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1115      * </p>
1116      *
1117      * <p>
1118      * This is a helper method that will produce the same result as:
1119      * </p>
1120      *
1121      * <pre>
1122      *
1123      * int offset = 0;
1124      * int seed = 104729;
1125      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4).putInt(data).array(), offset, 4, seed);
1126      * </pre>
1127      *
1128      * @param data The int to hash.
1129      * @return The 64-bit hash.
1130      * @see #hash64(byte[], int, int, int)
1131      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1132      */
1133     @Deprecated
1134     public static long hash64(final int data) {
1135         long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1136         long hash = DEFAULT_SEED;
1137         k1 *= C1;
1138         k1 = Long.rotateLeft(k1, R1);
1139         k1 *= C2;
1140         hash ^= k1;
1141         // finalization
1142         hash ^= Integer.BYTES;
1143         return fmix64(hash);
1144     }
1145 
1146     /**
1147      * Generates 64-bit hash from a long with a default seed.
1148      *
1149      * <p>
1150      * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1151      * </p>
1152      *
1153      * <p>
1154      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1155      * with the same byte data from the {@code long}. This method will be removed in a future release.
1156      * </p>
1157      *
1158      * <p>
1159      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1160      * </p>
1161      *
1162      * <p>
1163      * This is a helper method that will produce the same result as:
1164      * </p>
1165      *
1166      * <pre>
1167      *
1168      * int offset = 0;
1169      * int seed = 104729;
1170      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8).putLong(data).array(), offset, 8, seed);
1171      * </pre>
1172      *
1173      * @param data The long to hash.
1174      * @return The 64-bit hash.
1175      * @see #hash64(byte[], int, int, int)
1176      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1177      */
1178     @Deprecated
1179     public static long hash64(final long data) {
1180         long hash = DEFAULT_SEED;
1181         long k = Long.reverseBytes(data);
1182         // mix functions
1183         k *= C1;
1184         k = Long.rotateLeft(k, R1);
1185         k *= C2;
1186         hash ^= k;
1187         hash = Long.rotateLeft(hash, R2) * M + N1;
1188         // finalization
1189         hash ^= Long.BYTES;
1190         return fmix64(hash);
1191     }
1192 
1193     /**
1194      * Generates 64-bit hash from a short with a default seed.
1195      *
1196      * <p>
1197      * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1198      * </p>
1199      *
1200      * <p>
1201      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1202      * with the same byte data from the {@code short}. This method will be removed in a future release.
1203      * </p>
1204      *
1205      * <p>
1206      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1207      * </p>
1208      *
1209      * <p>
1210      * This is a helper method that will produce the same result as:
1211      * </p>
1212      *
1213      * <pre>
1214      *
1215      * int offset = 0;
1216      * int seed = 104729;
1217      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2).putShort(data).array(), offset, 2, seed);
1218      * </pre>
1219      *
1220      * @param data The short to hash.
1221      * @return The 64-bit hash.
1222      * @see #hash64(byte[], int, int, int)
1223      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the
1224      *             {@code short}.
1225      */
1226     @Deprecated
1227     public static long hash64(final short data) {
1228         long hash = DEFAULT_SEED;
1229         long k1 = 0;
1230         k1 ^= ((long) data & 0xff) << 8;
1231         k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1232         k1 *= C1;
1233         k1 = Long.rotateLeft(k1, R1);
1234         k1 *= C2;
1235         hash ^= k1;
1236         // finalization
1237         hash ^= Short.BYTES;
1238         return fmix64(hash);
1239     }
1240 
1241     /**
1242      * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1243      *
1244      * @param k The data to add to the hash.
1245      * @param hash The current hash.
1246      * @return The new hash.
1247      */
1248     private static int mix32(int k, int hash) {
1249         k *= C1_32;
1250         k = Integer.rotateLeft(k, R1_32);
1251         k *= C2_32;
1252         hash ^= k;
1253         return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1254     }
1255 
1256     /** No instance methods. */
1257     private MurmurHash3() {
1258     }
1259 }