001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020import org.apache.commons.codec.binary.StringUtils;
021
022/**
023 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
024 *
025 * <p>
026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
029 * </p>
030 *
031 * <p>
032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
034 * </p>
035 *
036 * <p>
037 * This is public domain code with no copyrights. From home page of
038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
039 * </p>
040 *
041 * <blockquote>
042 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
043 * code."
044 * </blockquote>
045 *
046 * <p>
047 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
048 * That adaption contains a {@code hash64} method that is not part of the original
049 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
050 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
051 * </p>
052 *
053 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
054 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
055 *      code</a>
056 * @see <a href=
057 *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
058 *      Apache Hive Murmer3</a>
059 * @since 1.13
060 */
061public final class MurmurHash3 {
062
063    /**
064     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
065     * hash computed.
066     *
067     * <p>
068     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
069     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
070     * </p>
071     *
072     * <p>
073     * This implementation contains a sign-extension bug in the finalization step of
074     * any bytes left over from dividing the length by 4. This manifests if any of these
075     * bytes are negative.
076     * </p>
077     *
078     * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
079     */
080    @Deprecated
081    public static class IncrementalHash32 extends IncrementalHash32x86 {
082
083        /**
084         * Constructs a new instance.
085         */
086        public IncrementalHash32() {
087            // empty
088        }
089
090        /**
091         * {@inheritDoc}
092         *
093         * <p>
094         * This implementation contains a sign-extension bug in the finalization step of
095         * any bytes left over from dividing the length by 4. This manifests if any of these
096         * bytes are negative.
097         * <p>
098         *
099         * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100         */
101        @Override
102        @Deprecated
103        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104            int result = hash;
105            // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106            int k1 = 0;
107            switch (unprocessedLength) {
108            case 3:
109                k1 ^= unprocessed[2] << 16;
110                // falls-through
111            case 2:
112                k1 ^= unprocessed[1] << 8;
113                // falls-through
114            case 1:
115                k1 ^= unprocessed[0];
116                // mix functions
117                k1 *= C1_32;
118                k1 = Integer.rotateLeft(k1, R1_32);
119                k1 *= C2_32;
120                result ^= k1;
121            }
122            // finalization
123            result ^= totalLen;
124            return fmix32(result);
125        }
126    }
127
128    /**
129     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130     * hash computed.
131     *
132     * <p>
133     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135     * </p>
136     *
137     * @since 1.14
138     */
139    public static class IncrementalHash32x86 {
140
141        /** The size of byte blocks that are processed together. */
142        private static final int BLOCK_SIZE = 4;
143
144        /**
145         * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146         * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147         * significant.
148         *
149         * @param b1 The first byte.
150         * @param b2 The second byte.
151         * @param b3 The third byte.
152         * @param b4 The fourth byte.
153         * @return The 32-bit integer.
154         */
155        private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156            return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157        }
158
159        /** Up to 3 unprocessed bytes from input data. */
160        private final byte[] unprocessed = new byte[3];
161
162        /** The number of unprocessed bytes in the tail data. */
163        private int unprocessedLength;
164
165        /** The total number of input bytes added since the start. */
166        private int totalLen;
167
168        /**
169         * The current running hash.
170         * This must be finalized to generate the 32-bit hash value.
171         */
172        private int hash;
173
174        /**
175         * Constructs a new instance.
176         */
177        public IncrementalHash32x86() {
178            // empty
179        }
180
181        /**
182         * Adds the byte array to the current incremental hash.
183         *
184         * @param data The input byte array.
185         * @param offset The offset of data.
186         * @param length The length of array.
187         */
188        public final void add(final byte[] data, final int offset, final int length) {
189            if (length <= 0) {
190                // Nothing to add
191                return;
192            }
193            totalLen += length;
194            // Process the bytes in blocks of 4.
195            // New bytes must be added to any current unprocessed bytes,
196            // then processed in blocks of 4 and the remaining bytes saved:
197            //
198            //    |--|---------------------------|--|
199            // unprocessed
200            //                main block
201            //                                remaining
202
203            // Check if the unprocessed bytes and new bytes can fill a block of 4.
204            // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205            // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206            if (unprocessedLength + length - BLOCK_SIZE < 0) {
207                // Not enough so add to the unprocessed bytes
208                System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209                unprocessedLength += length;
210                return;
211            }
212            // Combine unprocessed bytes with new bytes.
213            final int newOffset;
214            final int newLength;
215            if (unprocessedLength > 0) {
216                int k = -1;
217                switch (unprocessedLength) {
218                case 1:
219                    k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220                    break;
221                case 2:
222                    k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223                    break;
224                case 3:
225                    k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226                    break;
227                default:
228                    throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229                }
230                hash = mix32(k, hash);
231                // Update the offset and length
232                final int consumed = BLOCK_SIZE - unprocessedLength;
233                newOffset = offset + consumed;
234                newLength = length - consumed;
235            } else {
236                newOffset = offset;
237                newLength = length;
238            }
239            // Main processing of blocks of 4 bytes
240            final int nblocks = newLength >> 2;
241
242            for (int i = 0; i < nblocks; i++) {
243                final int index = newOffset + (i << 2);
244                final int k = MurmurHash.getLittleEndianInt(data, index);
245                hash = mix32(k, hash);
246            }
247            // Save left-over unprocessed bytes
248            final int consumed = nblocks << 2;
249            unprocessedLength = newLength - consumed;
250            if (unprocessedLength != 0) {
251                System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252            }
253        }
254
255        /**
256         * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257         * will generate the same hash value.
258         *
259         * @return The 32-bit hash.
260         */
261        public final int end() {
262            // Allow calling end() again after adding no data to return the same result.
263            return finalise(hash, unprocessedLength, unprocessed, totalLen);
264        }
265
266        /**
267         * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268         * and performing final mixing.
269         *
270         * @param hash The running hash.
271         * @param unprocessedLength The number of unprocessed bytes in the tail data.
272         * @param unprocessed Up to 3 unprocessed bytes from input data.
273         * @param totalLen The total number of input bytes added since the start.
274         * @return The 32-bit hash.
275         */
276        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277            int result = hash;
278            int k1 = 0;
279            switch (unprocessedLength) {
280            case 3:
281                k1 ^= (unprocessed[2] & 0xff) << 16;
282                // falls-through
283            case 2:
284                k1 ^= (unprocessed[1] & 0xff) << 8;
285                // falls-through
286            case 1:
287                k1 ^= unprocessed[0] & 0xff;
288                // mix functions
289                k1 *= C1_32;
290                k1 = Integer.rotateLeft(k1, R1_32);
291                k1 *= C2_32;
292                result ^= k1;
293            }
294            // finalization
295            result ^= totalLen;
296            return fmix32(result);
297        }
298
299        /**
300         * Starts a new incremental hash.
301         *
302         * @param seed The initial seed value.
303         */
304        public final void start(final int seed) {
305            // Reset
306            unprocessedLength = totalLen = 0;
307            hash = seed;
308        }
309    }
310
311    /**
312     * A random number to use for a hash code.
313     *
314     * @deprecated This is not used internally and will be removed in a future release.
315     */
316    @Deprecated
317    public static final long NULL_HASHCODE = 2862933555777941757L;
318
319    /**
320     * A default seed to use for the murmur hash algorithm.
321     * Has the value {@code 104729}.
322     */
323    public static final int DEFAULT_SEED = 104729;
324    // Constants for 32-bit variant
325    private static final int C1_32 = 0xcc9e2d51;
326    private static final int C2_32 = 0x1b873593;
327    private static final int R1_32 = 15;
328    private static final int R2_32 = 13;
329
330    private static final int M_32 = 5;
331    private static final int N_32 = 0xe6546b64;
332    // Constants for 128-bit variant
333    private static final long C1 = 0x87c37b91114253d5L;
334    private static final long C2 = 0x4cf5ad432745937fL;
335    private static final int R1 = 31;
336    private static final int R2 = 27;
337    private static final int R3 = 33;
338    private static final int M = 5;
339
340    private static final int N1 = 0x52dce729;
341
342    private static final int N2 = 0x38495ab5;
343
344    /**
345     * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
346     *
347     * @param hash The current hash.
348     * @return The final hash.
349     */
350    private static int fmix32(int hash) {
351        hash ^= hash >>> 16;
352        hash *= 0x85ebca6b;
353        hash ^= hash >>> 13;
354        hash *= 0xc2b2ae35;
355        hash ^= hash >>> 16;
356        return hash;
357    }
358
359    /**
360     * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
361     *
362     * @param hash The current hash.
363     * @return The final hash.
364     */
365    private static long fmix64(long hash) {
366        hash ^= hash >>> 33;
367        hash *= 0xff51afd7ed558ccdL;
368        hash ^= hash >>> 33;
369        hash *= 0xc4ceb9fe1a85ec53L;
370        hash ^= hash >>> 33;
371        return hash;
372    }
373
374    /**
375     * Generates 128-bit hash from the byte array with a default seed.
376     * This is a helper method that will produce the same result as:
377     *
378     * <pre>
379     * int offset = 0;
380     * int seed = 104729;
381     * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
382     * </pre>
383     *
384     * <p>
385     * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
386     * this result as the default seed is positive.
387     * </p>
388     *
389     * @param data The input byte array.
390     * @return The 128-bit hash (2 longs).
391     * @see #hash128(byte[], int, int, int)
392     */
393    public static long[] hash128(final byte[] data) {
394        return hash128(data, 0, data.length, DEFAULT_SEED);
395    }
396
397    /**
398     * Generates 128-bit hash from the byte array with the given offset, length and seed.
399     *
400     * <p>
401     * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
402     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
403     * </p>
404     *
405     * <p>
406     * This implementation contains a sign-extension bug in the seed initialization.
407     * This manifests if the seed is negative.
408     * </p>
409     *
410     * @param data The input byte array.
411     * @param offset The first element of array.
412     * @param length The length of array.
413     * @param seed The initial seed value.
414     * @return The 128-bit hash (2 longs).
415     * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
416     */
417    @Deprecated
418    public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
419        // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
420        // to maintain behavioral compatibility with the original version.
421        // The implicit conversion to a long will extend a negative sign
422        // bit through the upper 32-bits of the long seed. These should be zero.
423        return hash128x64Internal(data, offset, length, seed);
424    }
425
426    /**
427     * Generates 128-bit hash from a string with a default seed.
428     *
429     * <p>
430     * Before 1.14 the string was converted using default encoding. Since 1.14 the string is converted to bytes using UTF-8 encoding.
431     * </p>
432     * <p>
433     * This is a helper method that will produce the same result as:
434     * </p>
435     *
436     * <pre>
437     * int offset = 0;
438     * int seed = 104729;
439     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
440     * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
441     * </pre>
442     *
443     * <p>
444     * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect this result as the default seed is positive.
445     * </p>
446     *
447     * @param data The input String.
448     * @return The 128-bit hash (2 longs).
449     * @see #hash128(byte[], int, int, int)
450     * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from {@link String#getBytes(java.nio.charset.Charset)}.
451     */
452    @Deprecated
453    public static long[] hash128(final String data) {
454        final byte[] bytes = StringUtils.getBytesUtf8(data);
455        return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
456    }
457
458    /**
459     * Generates 128-bit hash from the byte array with a seed of zero.
460     * This is a helper method that will produce the same result as:
461     *
462     * <pre>
463     * int offset = 0;
464     * int seed = 0;
465     * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
466     * </pre>
467     *
468     * @param data The input byte array.
469     * @return The 128-bit hash (2 longs).
470     * @see #hash128x64(byte[], int, int, int)
471     * @since 1.14
472     */
473    public static long[] hash128x64(final byte[] data) {
474        return hash128x64(data, 0, data.length, 0);
475    }
476
477    /**
478     * Generates 128-bit hash from the byte array with the given offset, length and seed.
479     *
480     * <p>
481     * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
482     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
483     * </p>
484     *
485     * @param data The input byte array.
486     * @param offset The first element of array.
487     * @param length The length of array.
488     * @param seed The initial seed value.
489     * @return The 128-bit hash (2 longs).
490     * @since 1.14
491     */
492    public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
493        // Use an unsigned 32-bit integer as the seed
494        return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
495    }
496
497    /**
498     * Generates 128-bit hash from the byte array with the given offset, length and seed.
499     *
500     * <p>
501     * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
502     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
503     * </p>
504     *
505     * @param data The input byte array.
506     * @param offset The first element of array.
507     * @param length The length of array.
508     * @param seed The initial seed value.
509     * @return The 128-bit hash (2 longs).
510     */
511    private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
512        long h1 = seed;
513        long h2 = seed;
514        final int nblocks = length >> 4;
515        // body
516        for (int i = 0; i < nblocks; i++) {
517            final int index = offset + (i << 4);
518            long k1 = MurmurHash.getLittleEndianLong(data, index);
519            long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
520            // mix functions for k1
521            k1 *= C1;
522            k1 = Long.rotateLeft(k1, R1);
523            k1 *= C2;
524            h1 ^= k1;
525            h1 = Long.rotateLeft(h1, R2);
526            h1 += h2;
527            h1 = h1 * M + N1;
528            // mix functions for k2
529            k2 *= C2;
530            k2 = Long.rotateLeft(k2, R3);
531            k2 *= C1;
532            h2 ^= k2;
533            h2 = Long.rotateLeft(h2, R1);
534            h2 += h1;
535            h2 = h2 * M + N2;
536        }
537        // tail
538        long k1 = 0;
539        long k2 = 0;
540        final int index = offset + (nblocks << 4);
541        switch (offset + length - index) {
542        case 15:
543            k2 ^= ((long) data[index + 14] & 0xff) << 48;
544            // falls-through
545        case 14:
546            k2 ^= ((long) data[index + 13] & 0xff) << 40;
547            // falls-through
548        case 13:
549            k2 ^= ((long) data[index + 12] & 0xff) << 32;
550            // falls-through
551        case 12:
552            k2 ^= ((long) data[index + 11] & 0xff) << 24;
553            // falls-through
554        case 11:
555            k2 ^= ((long) data[index + 10] & 0xff) << 16;
556            // falls-through
557        case 10:
558            k2 ^= ((long) data[index + 9] & 0xff) << 8;
559            // falls-through
560        case 9:
561            k2 ^= data[index + 8] & 0xff;
562            k2 *= C2;
563            k2 = Long.rotateLeft(k2, R3);
564            k2 *= C1;
565            h2 ^= k2;
566            // falls-through
567        case 8:
568            k1 ^= ((long) data[index + 7] & 0xff) << 56;
569            // falls-through
570        case 7:
571            k1 ^= ((long) data[index + 6] & 0xff) << 48;
572            // falls-through
573        case 6:
574            k1 ^= ((long) data[index + 5] & 0xff) << 40;
575            // falls-through
576        case 5:
577            k1 ^= ((long) data[index + 4] & 0xff) << 32;
578            // falls-through
579        case 4:
580            k1 ^= ((long) data[index + 3] & 0xff) << 24;
581            // falls-through
582        case 3:
583            k1 ^= ((long) data[index + 2] & 0xff) << 16;
584            // falls-through
585        case 2:
586            k1 ^= ((long) data[index + 1] & 0xff) << 8;
587            // falls-through
588        case 1:
589            k1 ^= data[index] & 0xff;
590            k1 *= C1;
591            k1 = Long.rotateLeft(k1, R1);
592            k1 *= C2;
593            h1 ^= k1;
594        }
595        // finalization
596        h1 ^= length;
597        h2 ^= length;
598
599        h1 += h2;
600        h2 += h1;
601
602        h1 = fmix64(h1);
603        h2 = fmix64(h2);
604
605        h1 += h2;
606        h2 += h1;
607        return new long[] { h1, h2 };
608    }
609
610    /**
611     * Generates 32-bit hash from the byte array with a default seed.
612     * This is a helper method that will produce the same result as:
613     *
614     * <pre>
615     * int offset = 0;
616     * int seed = 104729;
617     * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
618     * </pre>
619     *
620     * <p>
621     * This implementation contains a sign-extension bug in the finalization step of
622     * any bytes left over from dividing the length by 4. This manifests if any of these
623     * bytes are negative.
624     * </p>
625     *
626     * @param data The input byte array.
627     * @return The 32-bit hash.
628     * @see #hash32(byte[], int, int, int)
629     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
630     */
631    @Deprecated
632    public static int hash32(final byte[] data) {
633        return hash32(data, 0, data.length, DEFAULT_SEED);
634    }
635
636    /**
637     * Generates 32-bit hash from the byte array with the given length and a default seed.
638     * This is a helper method that will produce the same result as:
639     *
640     * <pre>
641     * int offset = 0;
642     * int seed = 104729;
643     * int hash = MurmurHash3.hash32(data, offset, length, seed);
644     * </pre>
645     *
646     * <p>
647     * This implementation contains a sign-extension bug in the finalization step of
648     * any bytes left over from dividing the length by 4. This manifests if any of these
649     * bytes are negative.
650     * </p>
651     *
652     * @param data The input byte array.
653     * @param length The length of array.
654     * @return The 32-bit hash.
655     * @see #hash32(byte[], int, int, int)
656     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
657     */
658    @Deprecated
659    public static int hash32(final byte[] data, final int length) {
660        return hash32(data, length, DEFAULT_SEED);
661    }
662
663    /**
664     * Generates 32-bit hash from the byte array with the given length and seed. This is a
665     * helper method that will produce the same result as:
666     *
667     * <pre>
668     * int offset = 0;
669     * int hash = MurmurHash3.hash32(data, offset, length, seed);
670     * </pre>
671     *
672     * <p>
673     * This implementation contains a sign-extension bug in the finalization step of
674     * any bytes left over from dividing the length by 4. This manifests if any of these
675     * bytes are negative.
676     * </p>
677     *
678     * @param data The input byte array.
679     * @param length The length of array.
680     * @param seed The initial seed value.
681     * @return The 32-bit hash.
682     * @see #hash32(byte[], int, int, int)
683     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
684     */
685    @Deprecated
686    public static int hash32(final byte[] data, final int length, final int seed) {
687        return hash32(data, 0, length, seed);
688    }
689
690    /**
691     * Generates 32-bit hash from the byte array with the given offset, length and seed.
692     *
693     * <p>
694     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
695     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
696     * </p>
697     *
698     * <p>
699     * This implementation contains a sign-extension bug in the finalization step of
700     * any bytes left over from dividing the length by 4. This manifests if any of these
701     * bytes are negative.
702     * </p>
703     *
704     * @param data The input byte array.
705     * @param offset The offset of data.
706     * @param length The length of array.
707     * @param seed The initial seed value.
708     * @return The 32-bit hash.
709     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
710     */
711    @Deprecated
712    public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
713        int hash = seed;
714        final int nblocks = length >> 2;
715        // body
716        for (int i = 0; i < nblocks; i++) {
717            final int index = offset + (i << 2);
718            final int k = MurmurHash.getLittleEndianInt(data, index);
719            hash = mix32(k, hash);
720        }
721        // tail
722        // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
723        final int index = offset + (nblocks << 2);
724        int k1 = 0;
725        switch (offset + length - index) {
726        case 3:
727            k1 ^= data[index + 2] << 16;
728            // falls-through
729        case 2:
730            k1 ^= data[index + 1] << 8;
731            // falls-through
732        case 1:
733            k1 ^= data[index];
734            // mix functions
735            k1 *= C1_32;
736            k1 = Integer.rotateLeft(k1, R1_32);
737            k1 *= C2_32;
738            hash ^= k1;
739        }
740        hash ^= length;
741        return fmix32(hash);
742    }
743
744    /**
745     * Generates 32-bit hash from a long with a default seed value.
746     * This is a helper method that will produce the same result as:
747     *
748     * <pre>
749     * int offset = 0;
750     * int seed = 104729;
751     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
752     *                                            .putLong(data)
753     *                                            .array(), offset, 8, seed);
754     * </pre>
755     *
756     * @param data The long to hash.
757     * @return The 32-bit hash.
758     * @see #hash32x86(byte[], int, int, int)
759     */
760    public static int hash32(final long data) {
761        return hash32(data, DEFAULT_SEED);
762    }
763
764    /**
765     * Generates 32-bit hash from a long with the given seed.
766     * This is a helper method that will produce the same result as:
767     *
768     * <pre>
769     * int offset = 0;
770     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
771     *                                            .putLong(data)
772     *                                            .array(), offset, 8, seed);
773     * </pre>
774     *
775     * @param data The long to hash.
776     * @param seed The initial seed value.
777     * @return The 32-bit hash.
778     * @see #hash32x86(byte[], int, int, int)
779     */
780    public static int hash32(final long data, final int seed) {
781        int hash = seed;
782        final long r0 = Long.reverseBytes(data);
783
784        hash = mix32((int) r0, hash);
785        hash = mix32((int) (r0 >>> 32), hash);
786
787        hash ^= Long.BYTES;
788        return fmix32(hash);
789    }
790
791    /**
792     * Generates 32-bit hash from two longs with a default seed value.
793     * This is a helper method that will produce the same result as:
794     *
795     * <pre>
796     * int offset = 0;
797     * int seed = 104729;
798     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
799     *                                            .putLong(data1)
800     *                                            .putLong(data2)
801     *                                            .array(), offset, 16, seed);
802     * </pre>
803     *
804     * @param data1 The first long to hash.
805     * @param data2 The second long to hash.
806     * @return The 32-bit hash.
807     * @see #hash32x86(byte[], int, int, int)
808     */
809    public static int hash32(final long data1, final long data2) {
810        return hash32(data1, data2, DEFAULT_SEED);
811    }
812
813    /**
814     * Generates 32-bit hash from two longs with the given seed.
815     * This is a helper method that will produce the same result as:
816     *
817     * <pre>
818     * int offset = 0;
819     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
820     *                                            .putLong(data1)
821     *                                            .putLong(data2)
822     *                                            .array(), offset, 16, seed);
823     * </pre>
824     *
825     * @param data1 The first long to hash.
826     * @param data2 The second long to hash.
827     * @param seed The initial seed value.
828     * @return The 32-bit hash.
829     * @see #hash32x86(byte[], int, int, int)
830     */
831    public static int hash32(final long data1, final long data2, final int seed) {
832        int hash = seed;
833        final long r0 = Long.reverseBytes(data1);
834        final long r1 = Long.reverseBytes(data2);
835
836        hash = mix32((int) r0, hash);
837        hash = mix32((int) (r0 >>> 32), hash);
838        hash = mix32((int) r1, hash);
839        hash = mix32((int) (r1 >>> 32), hash);
840
841        hash ^= Long.BYTES * 2;
842        return fmix32(hash);
843    }
844
845    /**
846     * Generates 32-bit hash from a string with a default seed.
847     * <p>
848     * Before 1.14 the string was converted using default encoding.
849     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
850     * </p>
851     * This is a helper method that will produce the same result as:
852     *
853     * <pre>
854     * int offset = 0;
855     * int seed = 104729;
856     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
857     * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
858     * </pre>
859     *
860     * <p>
861     * This implementation contains a sign-extension bug in the finalization step of
862     * any bytes left over from dividing the length by 4. This manifests if any of these
863     * bytes are negative.
864     * </p>
865     *
866     * @param data The input string.
867     * @return The 32-bit hash.
868     * @see #hash32(byte[], int, int, int)
869     * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
870     * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
871     */
872    @Deprecated
873    public static int hash32(final String data) {
874        final byte[] bytes = StringUtils.getBytesUtf8(data);
875        return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
876    }
877
878    /**
879     * Generates 32-bit hash from the byte array with a seed of zero.
880     * This is a helper method that will produce the same result as:
881     *
882     * <pre>
883     * int offset = 0;
884     * int seed = 0;
885     * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
886     * </pre>
887     *
888     * @param data The input byte array.
889     * @return The 32-bit hash.
890     * @see #hash32x86(byte[], int, int, int)
891     * @since 1.14
892     */
893    public static int hash32x86(final byte[] data) {
894        return hash32x86(data, 0, data.length, 0);
895    }
896
897    /**
898     * Generates 32-bit hash from the byte array with the given offset, length and seed.
899     *
900     * <p>
901     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
902     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
903     * </p>
904     *
905     * @param data The input byte array.
906     * @param offset The offset of data.
907     * @param length The length of array.
908     * @param seed The initial seed value.
909     * @return The 32-bit hash.
910     * @since 1.14
911     */
912    public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
913        int hash = seed;
914        final int nblocks = length >> 2;
915        // body
916        for (int i = 0; i < nblocks; i++) {
917            final int index = offset + (i << 2);
918            final int k = MurmurHash.getLittleEndianInt(data, index);
919            hash = mix32(k, hash);
920        }
921        // tail
922        final int index = offset + (nblocks << 2);
923        int k1 = 0;
924        switch (offset + length - index) {
925        case 3:
926            k1 ^= (data[index + 2] & 0xff) << 16;
927            // falls-through
928        case 2:
929            // falls-through
930            k1 ^= (data[index + 1] & 0xff) << 8;
931            // falls-through
932        case 1:
933            k1 ^= data[index] & 0xff;
934            // mix functions
935            k1 *= C1_32;
936            k1 = Integer.rotateLeft(k1, R1_32);
937            k1 *= C2_32;
938            hash ^= k1;
939        }
940        hash ^= length;
941        return fmix32(hash);
942    }
943
944    /**
945     * Generates 64-bit hash from a byte array with a default seed.
946     *
947     * <p>
948     * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
949     * </p>
950     *
951     * <p>
952     * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
953     * with the same byte data. This method will be removed in a future release.
954     * </p>
955     *
956     * <p>
957     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
958     * </p>
959     *
960     * <p>
961     * This is a helper method that will produce the same result as:
962     * </p>
963     *
964     * <pre>
965     *
966     * int offset = 0;
967     * int seed = 104729;
968     * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
969     * </pre>
970     *
971     * @param data The input byte array.
972     * @return The 64-bit hash.
973     * @see #hash64(byte[], int, int, int)
974     * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])}.
975     */
976    @Deprecated
977    public static long hash64(final byte[] data) {
978        return hash64(data, 0, data.length, DEFAULT_SEED);
979    }
980
981    /**
982     * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
983     *
984     * <p>
985     * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
986     * </p>
987     *
988     * <p>
989     * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
990     * with the same byte data. This method will be removed in a future release.
991     * </p>
992     *
993     * <p>
994     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
995     * </p>
996     *
997     * <p>
998     * This is a helper method that will produce the same result as:
999     * </p>
1000     *
1001     * <pre>
1002     *
1003     * int seed = 104729;
1004     * long hash = MurmurHash3.hash64(data, offset, length, seed);
1005     * </pre>
1006     *
1007     * @param data   The input byte array.
1008     * @param offset The offset of data.
1009     * @param length The length of array.
1010     * @return The 64-bit hash.
1011     * @see #hash64(byte[], int, int, int)
1012     * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1013     */
1014    @Deprecated
1015    public static long hash64(final byte[] data, final int offset, final int length) {
1016        return hash64(data, offset, length, DEFAULT_SEED);
1017    }
1018
1019    /**
1020     * Generates 64-bit hash from a byte array with the given offset, length and seed.
1021     *
1022     * <p>
1023     * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1024     * </p>
1025     *
1026     * <p>
1027     * This is a Murmur3-like 64-bit variant. This method will be removed in a future release.
1028     * </p>
1029     *
1030     * <p>
1031     * This implementation contains a sign-extension bug in the seed initialization. This manifests if the seed is negative.
1032     * </p>
1033     *
1034     * <p>
1035     * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks of data processed in the MurmurHash3
1036     * {@code MurmurHash3_x64_128} method. However the hash is not mixed with a hash chunk from the next 8 bytes of data. The method will not return the same
1037     * value as the first or second 64-bits of the function {@link #hash128(byte[], int, int, int)}.
1038     * </p>
1039     *
1040     * <p>
1041     * Use of this method is not advised. Use the first long returned from {@link #hash128x64(byte[], int, int, int)}.
1042     * </p>
1043     *
1044     * @param data   The input byte array.
1045     * @param offset The offset of data.
1046     * @param length The length of array.
1047     * @param seed   The initial seed value.
1048     * @return The 64-bit hash.
1049     * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1050     */
1051    @Deprecated
1052    public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1053        // Note: This fails to apply masking using 0xffffffffL to the seed.
1054        long hash = seed;
1055        final int nblocks = length >> 3;
1056        // body
1057        for (int i = 0; i < nblocks; i++) {
1058            final int index = offset + (i << 3);
1059            long k = MurmurHash.getLittleEndianLong(data, index);
1060            // mix functions
1061            k *= C1;
1062            k = Long.rotateLeft(k, R1);
1063            k *= C2;
1064            hash ^= k;
1065            hash = Long.rotateLeft(hash, R2) * M + N1;
1066        }
1067        // tail
1068        long k1 = 0;
1069        final int index = offset + (nblocks << 3);
1070        switch (offset + length - index) {
1071        case 7:
1072            k1 ^= ((long) data[index + 6] & 0xff) << 48;
1073            // falls-through
1074        case 6:
1075            k1 ^= ((long) data[index + 5] & 0xff) << 40;
1076            // falls-through
1077        case 5:
1078            k1 ^= ((long) data[index + 4] & 0xff) << 32;
1079            // falls-through
1080        case 4:
1081            k1 ^= ((long) data[index + 3] & 0xff) << 24;
1082            // falls-through
1083        case 3:
1084            k1 ^= ((long) data[index + 2] & 0xff) << 16;
1085            // falls-through
1086        case 2:
1087            k1 ^= ((long) data[index + 1] & 0xff) << 8;
1088            // falls-through
1089        case 1:
1090            k1 ^= (long) data[index] & 0xff;
1091            k1 *= C1;
1092            k1 = Long.rotateLeft(k1, R1);
1093            k1 *= C2;
1094            hash ^= k1;
1095        }
1096        // finalization
1097        hash ^= length;
1098        return fmix64(hash);
1099    }
1100
1101    /**
1102     * Generates 64-bit hash from an int with a default seed.
1103     *
1104     * <p>
1105     * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1106     * </p>
1107     *
1108     * <p>
1109     * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1110     * with the same byte data from the {@code int}. This method will be removed in a future release.
1111     * </p>
1112     *
1113     * <p>
1114     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1115     * </p>
1116     *
1117     * <p>
1118     * This is a helper method that will produce the same result as:
1119     * </p>
1120     *
1121     * <pre>
1122     *
1123     * int offset = 0;
1124     * int seed = 104729;
1125     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4).putInt(data).array(), offset, 4, seed);
1126     * </pre>
1127     *
1128     * @param data The int to hash.
1129     * @return The 64-bit hash.
1130     * @see #hash64(byte[], int, int, int)
1131     * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1132     */
1133    @Deprecated
1134    public static long hash64(final int data) {
1135        long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1136        long hash = DEFAULT_SEED;
1137        k1 *= C1;
1138        k1 = Long.rotateLeft(k1, R1);
1139        k1 *= C2;
1140        hash ^= k1;
1141        // finalization
1142        hash ^= Integer.BYTES;
1143        return fmix64(hash);
1144    }
1145
1146    /**
1147     * Generates 64-bit hash from a long with a default seed.
1148     *
1149     * <p>
1150     * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1151     * </p>
1152     *
1153     * <p>
1154     * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1155     * with the same byte data from the {@code long}. This method will be removed in a future release.
1156     * </p>
1157     *
1158     * <p>
1159     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1160     * </p>
1161     *
1162     * <p>
1163     * This is a helper method that will produce the same result as:
1164     * </p>
1165     *
1166     * <pre>
1167     *
1168     * int offset = 0;
1169     * int seed = 104729;
1170     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8).putLong(data).array(), offset, 8, seed);
1171     * </pre>
1172     *
1173     * @param data The long to hash.
1174     * @return The 64-bit hash.
1175     * @see #hash64(byte[], int, int, int)
1176     * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1177     */
1178    @Deprecated
1179    public static long hash64(final long data) {
1180        long hash = DEFAULT_SEED;
1181        long k = Long.reverseBytes(data);
1182        // mix functions
1183        k *= C1;
1184        k = Long.rotateLeft(k, R1);
1185        k *= C2;
1186        hash ^= k;
1187        hash = Long.rotateLeft(hash, R2) * M + N1;
1188        // finalization
1189        hash ^= Long.BYTES;
1190        return fmix64(hash);
1191    }
1192
1193    /**
1194     * Generates 64-bit hash from a short with a default seed.
1195     *
1196     * <p>
1197     * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1198     * </p>
1199     *
1200     * <p>
1201     * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1202     * with the same byte data from the {@code short}. This method will be removed in a future release.
1203     * </p>
1204     *
1205     * <p>
1206     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1207     * </p>
1208     *
1209     * <p>
1210     * This is a helper method that will produce the same result as:
1211     * </p>
1212     *
1213     * <pre>
1214     *
1215     * int offset = 0;
1216     * int seed = 104729;
1217     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2).putShort(data).array(), offset, 2, seed);
1218     * </pre>
1219     *
1220     * @param data The short to hash.
1221     * @return The 64-bit hash.
1222     * @see #hash64(byte[], int, int, int)
1223     * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the
1224     *             {@code short}.
1225     */
1226    @Deprecated
1227    public static long hash64(final short data) {
1228        long hash = DEFAULT_SEED;
1229        long k1 = 0;
1230        k1 ^= ((long) data & 0xff) << 8;
1231        k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1232        k1 *= C1;
1233        k1 = Long.rotateLeft(k1, R1);
1234        k1 *= C2;
1235        hash ^= k1;
1236        // finalization
1237        hash ^= Short.BYTES;
1238        return fmix64(hash);
1239    }
1240
1241    /**
1242     * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1243     *
1244     * @param k The data to add to the hash.
1245     * @param hash The current hash.
1246     * @return The new hash.
1247     */
1248    private static int mix32(int k, int hash) {
1249        k *= C1_32;
1250        k = Integer.rotateLeft(k, R1_32);
1251        k *= C2_32;
1252        hash ^= k;
1253        return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1254    }
1255
1256    /** No instance methods. */
1257    private MurmurHash3() {
1258    }
1259}