001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.digest; 019 020import org.apache.commons.codec.binary.StringUtils; 021 022/** 023 * Implements the MurmurHash3 32-bit and 128-bit hash functions. 024 * 025 * <p> 026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic 027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not 028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes. 029 * </p> 030 * 031 * <p> 032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function 033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher. 034 * </p> 035 * 036 * <p> 037 * This is public domain code with no copyrights. From home page of 038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>: 039 * </p> 040 * 041 * <blockquote> 042 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their 043 * code." 044 * </blockquote> 045 * 046 * <p> 047 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>. 048 * That adaption contains a {@code hash64} method that is not part of the original 049 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a 050 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes. 051 * </p> 052 * 053 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 054 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++ 055 * code</a> 056 * @see <a href= 057 * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java"> 058 * Apache Hive Murmer3</a> 059 * @since 1.13 060 */ 061public final class MurmurHash3 { 062 063 /** 064 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new 065 * hash computed. 066 * 067 * <p> 068 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 069 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 070 * </p> 071 * 072 * <p> 073 * This implementation contains a sign-extension bug in the finalization step of 074 * any bytes left over from dividing the length by 4. This manifests if any of these 075 * bytes are negative. 076 * </p> 077 * 078 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes. 079 */ 080 @Deprecated 081 public static class IncrementalHash32 extends IncrementalHash32x86 { 082 083 /** 084 * Constructs a new instance. 085 */ 086 public IncrementalHash32() { 087 // empty 088 } 089 090 /** 091 * {@inheritDoc} 092 * 093 * <p> 094 * This implementation contains a sign-extension bug in the finalization step of 095 * any bytes left over from dividing the length by 4. This manifests if any of these 096 * bytes are negative. 097 * <p> 098 * 099 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes. 100 */ 101 @Override 102 @Deprecated 103 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) { 104 int result = hash; 105 // Note: This fails to apply masking using 0xff to the 3 remaining bytes. 106 int k1 = 0; 107 switch (unprocessedLength) { 108 case 3: 109 k1 ^= unprocessed[2] << 16; 110 // falls-through 111 case 2: 112 k1 ^= unprocessed[1] << 8; 113 // falls-through 114 case 1: 115 k1 ^= unprocessed[0]; 116 // mix functions 117 k1 *= C1_32; 118 k1 = Integer.rotateLeft(k1, R1_32); 119 k1 *= C2_32; 120 result ^= k1; 121 } 122 // finalization 123 result ^= totalLen; 124 return fmix32(result); 125 } 126 } 127 128 /** 129 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new 130 * hash computed. 131 * 132 * <p> 133 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 134 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 135 * </p> 136 * 137 * @since 1.14 138 */ 139 public static class IncrementalHash32x86 { 140 141 /** The size of byte blocks that are processed together. */ 142 private static final int BLOCK_SIZE = 4; 143 144 /** 145 * Combines the bytes using an Or operation ({@code | } in a little-endian representation 146 * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most 147 * significant. 148 * 149 * @param b1 The first byte. 150 * @param b2 The second byte. 151 * @param b3 The third byte. 152 * @param b4 The fourth byte. 153 * @return The 32-bit integer. 154 */ 155 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) { 156 return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24; 157 } 158 159 /** Up to 3 unprocessed bytes from input data. */ 160 private final byte[] unprocessed = new byte[3]; 161 162 /** The number of unprocessed bytes in the tail data. */ 163 private int unprocessedLength; 164 165 /** The total number of input bytes added since the start. */ 166 private int totalLen; 167 168 /** 169 * The current running hash. 170 * This must be finalized to generate the 32-bit hash value. 171 */ 172 private int hash; 173 174 /** 175 * Constructs a new instance. 176 */ 177 public IncrementalHash32x86() { 178 // empty 179 } 180 181 /** 182 * Adds the byte array to the current incremental hash. 183 * 184 * @param data The input byte array. 185 * @param offset The offset of data. 186 * @param length The length of array. 187 */ 188 public final void add(final byte[] data, final int offset, final int length) { 189 if (length <= 0) { 190 // Nothing to add 191 return; 192 } 193 totalLen += length; 194 // Process the bytes in blocks of 4. 195 // New bytes must be added to any current unprocessed bytes, 196 // then processed in blocks of 4 and the remaining bytes saved: 197 // 198 // |--|---------------------------|--| 199 // unprocessed 200 // main block 201 // remaining 202 203 // Check if the unprocessed bytes and new bytes can fill a block of 4. 204 // Make this overflow safe in the event that length is Integer.MAX_VALUE. 205 // Equivalent to: (unprocessedLength + length < BLOCK_SIZE) 206 if (unprocessedLength + length - BLOCK_SIZE < 0) { 207 // Not enough so add to the unprocessed bytes 208 System.arraycopy(data, offset, unprocessed, unprocessedLength, length); 209 unprocessedLength += length; 210 return; 211 } 212 // Combine unprocessed bytes with new bytes. 213 final int newOffset; 214 final int newLength; 215 if (unprocessedLength > 0) { 216 int k = -1; 217 switch (unprocessedLength) { 218 case 1: 219 k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]); 220 break; 221 case 2: 222 k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]); 223 break; 224 case 3: 225 k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]); 226 break; 227 default: 228 throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength); 229 } 230 hash = mix32(k, hash); 231 // Update the offset and length 232 final int consumed = BLOCK_SIZE - unprocessedLength; 233 newOffset = offset + consumed; 234 newLength = length - consumed; 235 } else { 236 newOffset = offset; 237 newLength = length; 238 } 239 // Main processing of blocks of 4 bytes 240 final int nblocks = newLength >> 2; 241 242 for (int i = 0; i < nblocks; i++) { 243 final int index = newOffset + (i << 2); 244 final int k = MurmurHash.getLittleEndianInt(data, index); 245 hash = mix32(k, hash); 246 } 247 // Save left-over unprocessed bytes 248 final int consumed = nblocks << 2; 249 unprocessedLength = newLength - consumed; 250 if (unprocessedLength != 0) { 251 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength); 252 } 253 } 254 255 /** 256 * Generates the 32-bit hash value. Repeat calls to this method with no additional data 257 * will generate the same hash value. 258 * 259 * @return The 32-bit hash. 260 */ 261 public final int end() { 262 // Allow calling end() again after adding no data to return the same result. 263 return finalise(hash, unprocessedLength, unprocessed, totalLen); 264 } 265 266 /** 267 * Finalizes the running hash to the output 32-bit hash by processing remaining bytes 268 * and performing final mixing. 269 * 270 * @param hash The running hash. 271 * @param unprocessedLength The number of unprocessed bytes in the tail data. 272 * @param unprocessed Up to 3 unprocessed bytes from input data. 273 * @param totalLen The total number of input bytes added since the start. 274 * @return The 32-bit hash. 275 */ 276 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) { 277 int result = hash; 278 int k1 = 0; 279 switch (unprocessedLength) { 280 case 3: 281 k1 ^= (unprocessed[2] & 0xff) << 16; 282 // falls-through 283 case 2: 284 k1 ^= (unprocessed[1] & 0xff) << 8; 285 // falls-through 286 case 1: 287 k1 ^= unprocessed[0] & 0xff; 288 // mix functions 289 k1 *= C1_32; 290 k1 = Integer.rotateLeft(k1, R1_32); 291 k1 *= C2_32; 292 result ^= k1; 293 } 294 // finalization 295 result ^= totalLen; 296 return fmix32(result); 297 } 298 299 /** 300 * Starts a new incremental hash. 301 * 302 * @param seed The initial seed value. 303 */ 304 public final void start(final int seed) { 305 // Reset 306 unprocessedLength = totalLen = 0; 307 hash = seed; 308 } 309 } 310 311 /** 312 * A random number to use for a hash code. 313 * 314 * @deprecated This is not used internally and will be removed in a future release. 315 */ 316 @Deprecated 317 public static final long NULL_HASHCODE = 2862933555777941757L; 318 319 /** 320 * A default seed to use for the murmur hash algorithm. 321 * Has the value {@code 104729}. 322 */ 323 public static final int DEFAULT_SEED = 104729; 324 // Constants for 32-bit variant 325 private static final int C1_32 = 0xcc9e2d51; 326 private static final int C2_32 = 0x1b873593; 327 private static final int R1_32 = 15; 328 private static final int R2_32 = 13; 329 330 private static final int M_32 = 5; 331 private static final int N_32 = 0xe6546b64; 332 // Constants for 128-bit variant 333 private static final long C1 = 0x87c37b91114253d5L; 334 private static final long C2 = 0x4cf5ad432745937fL; 335 private static final int R1 = 31; 336 private static final int R2 = 27; 337 private static final int R3 = 33; 338 private static final int M = 5; 339 340 private static final int N1 = 0x52dce729; 341 342 private static final int N2 = 0x38495ab5; 343 344 /** 345 * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}. 346 * 347 * @param hash The current hash. 348 * @return The final hash. 349 */ 350 private static int fmix32(int hash) { 351 hash ^= hash >>> 16; 352 hash *= 0x85ebca6b; 353 hash ^= hash >>> 13; 354 hash *= 0xc2b2ae35; 355 hash ^= hash >>> 16; 356 return hash; 357 } 358 359 /** 360 * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}. 361 * 362 * @param hash The current hash. 363 * @return The final hash. 364 */ 365 private static long fmix64(long hash) { 366 hash ^= hash >>> 33; 367 hash *= 0xff51afd7ed558ccdL; 368 hash ^= hash >>> 33; 369 hash *= 0xc4ceb9fe1a85ec53L; 370 hash ^= hash >>> 33; 371 return hash; 372 } 373 374 /** 375 * Generates 128-bit hash from the byte array with a default seed. 376 * This is a helper method that will produce the same result as: 377 * 378 * <pre> 379 * int offset = 0; 380 * int seed = 104729; 381 * int hash = MurmurHash3.hash128(data, offset, data.length, seed); 382 * </pre> 383 * 384 * <p> 385 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect 386 * this result as the default seed is positive. 387 * </p> 388 * 389 * @param data The input byte array. 390 * @return The 128-bit hash (2 longs). 391 * @see #hash128(byte[], int, int, int) 392 */ 393 public static long[] hash128(final byte[] data) { 394 return hash128(data, 0, data.length, DEFAULT_SEED); 395 } 396 397 /** 398 * Generates 128-bit hash from the byte array with the given offset, length and seed. 399 * 400 * <p> 401 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 402 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 403 * </p> 404 * 405 * <p> 406 * This implementation contains a sign-extension bug in the seed initialization. 407 * This manifests if the seed is negative. 408 * </p> 409 * 410 * @param data The input byte array. 411 * @param offset The first element of array. 412 * @param length The length of array. 413 * @param seed The initial seed value. 414 * @return The 128-bit hash (2 longs). 415 * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization. 416 */ 417 @Deprecated 418 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) { 419 // Note: This deliberately fails to apply masking using 0xffffffffL to the seed 420 // to maintain behavioral compatibility with the original version. 421 // The implicit conversion to a long will extend a negative sign 422 // bit through the upper 32-bits of the long seed. These should be zero. 423 return hash128x64Internal(data, offset, length, seed); 424 } 425 426 /** 427 * Generates 128-bit hash from a string with a default seed. 428 * 429 * <p> 430 * Before 1.14 the string was converted using default encoding. Since 1.14 the string is converted to bytes using UTF-8 encoding. 431 * </p> 432 * <p> 433 * This is a helper method that will produce the same result as: 434 * </p> 435 * 436 * <pre> 437 * int offset = 0; 438 * int seed = 104729; 439 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 440 * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed); 441 * </pre> 442 * 443 * <p> 444 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect this result as the default seed is positive. 445 * </p> 446 * 447 * @param data The input String. 448 * @return The 128-bit hash (2 longs). 449 * @see #hash128(byte[], int, int, int) 450 * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from {@link String#getBytes(java.nio.charset.Charset)}. 451 */ 452 @Deprecated 453 public static long[] hash128(final String data) { 454 final byte[] bytes = StringUtils.getBytesUtf8(data); 455 return hash128(bytes, 0, bytes.length, DEFAULT_SEED); 456 } 457 458 /** 459 * Generates 128-bit hash from the byte array with a seed of zero. 460 * This is a helper method that will produce the same result as: 461 * 462 * <pre> 463 * int offset = 0; 464 * int seed = 0; 465 * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed); 466 * </pre> 467 * 468 * @param data The input byte array. 469 * @return The 128-bit hash (2 longs). 470 * @see #hash128x64(byte[], int, int, int) 471 * @since 1.14 472 */ 473 public static long[] hash128x64(final byte[] data) { 474 return hash128x64(data, 0, data.length, 0); 475 } 476 477 /** 478 * Generates 128-bit hash from the byte array with the given offset, length and seed. 479 * 480 * <p> 481 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 482 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 483 * </p> 484 * 485 * @param data The input byte array. 486 * @param offset The first element of array. 487 * @param length The length of array. 488 * @param seed The initial seed value. 489 * @return The 128-bit hash (2 longs). 490 * @since 1.14 491 */ 492 public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) { 493 // Use an unsigned 32-bit integer as the seed 494 return hash128x64Internal(data, offset, length, seed & 0xffffffffL); 495 } 496 497 /** 498 * Generates 128-bit hash from the byte array with the given offset, length and seed. 499 * 500 * <p> 501 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 502 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 503 * </p> 504 * 505 * @param data The input byte array. 506 * @param offset The first element of array. 507 * @param length The length of array. 508 * @param seed The initial seed value. 509 * @return The 128-bit hash (2 longs). 510 */ 511 private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) { 512 long h1 = seed; 513 long h2 = seed; 514 final int nblocks = length >> 4; 515 // body 516 for (int i = 0; i < nblocks; i++) { 517 final int index = offset + (i << 4); 518 long k1 = MurmurHash.getLittleEndianLong(data, index); 519 long k2 = MurmurHash.getLittleEndianLong(data, index + 8); 520 // mix functions for k1 521 k1 *= C1; 522 k1 = Long.rotateLeft(k1, R1); 523 k1 *= C2; 524 h1 ^= k1; 525 h1 = Long.rotateLeft(h1, R2); 526 h1 += h2; 527 h1 = h1 * M + N1; 528 // mix functions for k2 529 k2 *= C2; 530 k2 = Long.rotateLeft(k2, R3); 531 k2 *= C1; 532 h2 ^= k2; 533 h2 = Long.rotateLeft(h2, R1); 534 h2 += h1; 535 h2 = h2 * M + N2; 536 } 537 // tail 538 long k1 = 0; 539 long k2 = 0; 540 final int index = offset + (nblocks << 4); 541 switch (offset + length - index) { 542 case 15: 543 k2 ^= ((long) data[index + 14] & 0xff) << 48; 544 // falls-through 545 case 14: 546 k2 ^= ((long) data[index + 13] & 0xff) << 40; 547 // falls-through 548 case 13: 549 k2 ^= ((long) data[index + 12] & 0xff) << 32; 550 // falls-through 551 case 12: 552 k2 ^= ((long) data[index + 11] & 0xff) << 24; 553 // falls-through 554 case 11: 555 k2 ^= ((long) data[index + 10] & 0xff) << 16; 556 // falls-through 557 case 10: 558 k2 ^= ((long) data[index + 9] & 0xff) << 8; 559 // falls-through 560 case 9: 561 k2 ^= data[index + 8] & 0xff; 562 k2 *= C2; 563 k2 = Long.rotateLeft(k2, R3); 564 k2 *= C1; 565 h2 ^= k2; 566 // falls-through 567 case 8: 568 k1 ^= ((long) data[index + 7] & 0xff) << 56; 569 // falls-through 570 case 7: 571 k1 ^= ((long) data[index + 6] & 0xff) << 48; 572 // falls-through 573 case 6: 574 k1 ^= ((long) data[index + 5] & 0xff) << 40; 575 // falls-through 576 case 5: 577 k1 ^= ((long) data[index + 4] & 0xff) << 32; 578 // falls-through 579 case 4: 580 k1 ^= ((long) data[index + 3] & 0xff) << 24; 581 // falls-through 582 case 3: 583 k1 ^= ((long) data[index + 2] & 0xff) << 16; 584 // falls-through 585 case 2: 586 k1 ^= ((long) data[index + 1] & 0xff) << 8; 587 // falls-through 588 case 1: 589 k1 ^= data[index] & 0xff; 590 k1 *= C1; 591 k1 = Long.rotateLeft(k1, R1); 592 k1 *= C2; 593 h1 ^= k1; 594 } 595 // finalization 596 h1 ^= length; 597 h2 ^= length; 598 599 h1 += h2; 600 h2 += h1; 601 602 h1 = fmix64(h1); 603 h2 = fmix64(h2); 604 605 h1 += h2; 606 h2 += h1; 607 return new long[] { h1, h2 }; 608 } 609 610 /** 611 * Generates 32-bit hash from the byte array with a default seed. 612 * This is a helper method that will produce the same result as: 613 * 614 * <pre> 615 * int offset = 0; 616 * int seed = 104729; 617 * int hash = MurmurHash3.hash32(data, offset, data.length, seed); 618 * </pre> 619 * 620 * <p> 621 * This implementation contains a sign-extension bug in the finalization step of 622 * any bytes left over from dividing the length by 4. This manifests if any of these 623 * bytes are negative. 624 * </p> 625 * 626 * @param data The input byte array. 627 * @return The 32-bit hash. 628 * @see #hash32(byte[], int, int, int) 629 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 630 */ 631 @Deprecated 632 public static int hash32(final byte[] data) { 633 return hash32(data, 0, data.length, DEFAULT_SEED); 634 } 635 636 /** 637 * Generates 32-bit hash from the byte array with the given length and a default seed. 638 * This is a helper method that will produce the same result as: 639 * 640 * <pre> 641 * int offset = 0; 642 * int seed = 104729; 643 * int hash = MurmurHash3.hash32(data, offset, length, seed); 644 * </pre> 645 * 646 * <p> 647 * This implementation contains a sign-extension bug in the finalization step of 648 * any bytes left over from dividing the length by 4. This manifests if any of these 649 * bytes are negative. 650 * </p> 651 * 652 * @param data The input byte array. 653 * @param length The length of array. 654 * @return The 32-bit hash. 655 * @see #hash32(byte[], int, int, int) 656 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 657 */ 658 @Deprecated 659 public static int hash32(final byte[] data, final int length) { 660 return hash32(data, length, DEFAULT_SEED); 661 } 662 663 /** 664 * Generates 32-bit hash from the byte array with the given length and seed. This is a 665 * helper method that will produce the same result as: 666 * 667 * <pre> 668 * int offset = 0; 669 * int hash = MurmurHash3.hash32(data, offset, length, seed); 670 * </pre> 671 * 672 * <p> 673 * This implementation contains a sign-extension bug in the finalization step of 674 * any bytes left over from dividing the length by 4. This manifests if any of these 675 * bytes are negative. 676 * </p> 677 * 678 * @param data The input byte array. 679 * @param length The length of array. 680 * @param seed The initial seed value. 681 * @return The 32-bit hash. 682 * @see #hash32(byte[], int, int, int) 683 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 684 */ 685 @Deprecated 686 public static int hash32(final byte[] data, final int length, final int seed) { 687 return hash32(data, 0, length, seed); 688 } 689 690 /** 691 * Generates 32-bit hash from the byte array with the given offset, length and seed. 692 * 693 * <p> 694 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 695 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 696 * </p> 697 * 698 * <p> 699 * This implementation contains a sign-extension bug in the finalization step of 700 * any bytes left over from dividing the length by 4. This manifests if any of these 701 * bytes are negative. 702 * </p> 703 * 704 * @param data The input byte array. 705 * @param offset The offset of data. 706 * @param length The length of array. 707 * @param seed The initial seed value. 708 * @return The 32-bit hash. 709 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 710 */ 711 @Deprecated 712 public static int hash32(final byte[] data, final int offset, final int length, final int seed) { 713 int hash = seed; 714 final int nblocks = length >> 2; 715 // body 716 for (int i = 0; i < nblocks; i++) { 717 final int index = offset + (i << 2); 718 final int k = MurmurHash.getLittleEndianInt(data, index); 719 hash = mix32(k, hash); 720 } 721 // tail 722 // Note: This fails to apply masking using 0xff to the 3 remaining bytes. 723 final int index = offset + (nblocks << 2); 724 int k1 = 0; 725 switch (offset + length - index) { 726 case 3: 727 k1 ^= data[index + 2] << 16; 728 // falls-through 729 case 2: 730 k1 ^= data[index + 1] << 8; 731 // falls-through 732 case 1: 733 k1 ^= data[index]; 734 // mix functions 735 k1 *= C1_32; 736 k1 = Integer.rotateLeft(k1, R1_32); 737 k1 *= C2_32; 738 hash ^= k1; 739 } 740 hash ^= length; 741 return fmix32(hash); 742 } 743 744 /** 745 * Generates 32-bit hash from a long with a default seed value. 746 * This is a helper method that will produce the same result as: 747 * 748 * <pre> 749 * int offset = 0; 750 * int seed = 104729; 751 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8) 752 * .putLong(data) 753 * .array(), offset, 8, seed); 754 * </pre> 755 * 756 * @param data The long to hash. 757 * @return The 32-bit hash. 758 * @see #hash32x86(byte[], int, int, int) 759 */ 760 public static int hash32(final long data) { 761 return hash32(data, DEFAULT_SEED); 762 } 763 764 /** 765 * Generates 32-bit hash from a long with the given seed. 766 * This is a helper method that will produce the same result as: 767 * 768 * <pre> 769 * int offset = 0; 770 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8) 771 * .putLong(data) 772 * .array(), offset, 8, seed); 773 * </pre> 774 * 775 * @param data The long to hash. 776 * @param seed The initial seed value. 777 * @return The 32-bit hash. 778 * @see #hash32x86(byte[], int, int, int) 779 */ 780 public static int hash32(final long data, final int seed) { 781 int hash = seed; 782 final long r0 = Long.reverseBytes(data); 783 784 hash = mix32((int) r0, hash); 785 hash = mix32((int) (r0 >>> 32), hash); 786 787 hash ^= Long.BYTES; 788 return fmix32(hash); 789 } 790 791 /** 792 * Generates 32-bit hash from two longs with a default seed value. 793 * This is a helper method that will produce the same result as: 794 * 795 * <pre> 796 * int offset = 0; 797 * int seed = 104729; 798 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16) 799 * .putLong(data1) 800 * .putLong(data2) 801 * .array(), offset, 16, seed); 802 * </pre> 803 * 804 * @param data1 The first long to hash. 805 * @param data2 The second long to hash. 806 * @return The 32-bit hash. 807 * @see #hash32x86(byte[], int, int, int) 808 */ 809 public static int hash32(final long data1, final long data2) { 810 return hash32(data1, data2, DEFAULT_SEED); 811 } 812 813 /** 814 * Generates 32-bit hash from two longs with the given seed. 815 * This is a helper method that will produce the same result as: 816 * 817 * <pre> 818 * int offset = 0; 819 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16) 820 * .putLong(data1) 821 * .putLong(data2) 822 * .array(), offset, 16, seed); 823 * </pre> 824 * 825 * @param data1 The first long to hash. 826 * @param data2 The second long to hash. 827 * @param seed The initial seed value. 828 * @return The 32-bit hash. 829 * @see #hash32x86(byte[], int, int, int) 830 */ 831 public static int hash32(final long data1, final long data2, final int seed) { 832 int hash = seed; 833 final long r0 = Long.reverseBytes(data1); 834 final long r1 = Long.reverseBytes(data2); 835 836 hash = mix32((int) r0, hash); 837 hash = mix32((int) (r0 >>> 32), hash); 838 hash = mix32((int) r1, hash); 839 hash = mix32((int) (r1 >>> 32), hash); 840 841 hash ^= Long.BYTES * 2; 842 return fmix32(hash); 843 } 844 845 /** 846 * Generates 32-bit hash from a string with a default seed. 847 * <p> 848 * Before 1.14 the string was converted using default encoding. 849 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 850 * </p> 851 * This is a helper method that will produce the same result as: 852 * 853 * <pre> 854 * int offset = 0; 855 * int seed = 104729; 856 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 857 * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed); 858 * </pre> 859 * 860 * <p> 861 * This implementation contains a sign-extension bug in the finalization step of 862 * any bytes left over from dividing the length by 4. This manifests if any of these 863 * bytes are negative. 864 * </p> 865 * 866 * @param data The input string. 867 * @return The 32-bit hash. 868 * @see #hash32(byte[], int, int, int) 869 * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from 870 * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes. 871 */ 872 @Deprecated 873 public static int hash32(final String data) { 874 final byte[] bytes = StringUtils.getBytesUtf8(data); 875 return hash32(bytes, 0, bytes.length, DEFAULT_SEED); 876 } 877 878 /** 879 * Generates 32-bit hash from the byte array with a seed of zero. 880 * This is a helper method that will produce the same result as: 881 * 882 * <pre> 883 * int offset = 0; 884 * int seed = 0; 885 * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed); 886 * </pre> 887 * 888 * @param data The input byte array. 889 * @return The 32-bit hash. 890 * @see #hash32x86(byte[], int, int, int) 891 * @since 1.14 892 */ 893 public static int hash32x86(final byte[] data) { 894 return hash32x86(data, 0, data.length, 0); 895 } 896 897 /** 898 * Generates 32-bit hash from the byte array with the given offset, length and seed. 899 * 900 * <p> 901 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 902 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 903 * </p> 904 * 905 * @param data The input byte array. 906 * @param offset The offset of data. 907 * @param length The length of array. 908 * @param seed The initial seed value. 909 * @return The 32-bit hash. 910 * @since 1.14 911 */ 912 public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) { 913 int hash = seed; 914 final int nblocks = length >> 2; 915 // body 916 for (int i = 0; i < nblocks; i++) { 917 final int index = offset + (i << 2); 918 final int k = MurmurHash.getLittleEndianInt(data, index); 919 hash = mix32(k, hash); 920 } 921 // tail 922 final int index = offset + (nblocks << 2); 923 int k1 = 0; 924 switch (offset + length - index) { 925 case 3: 926 k1 ^= (data[index + 2] & 0xff) << 16; 927 // falls-through 928 case 2: 929 // falls-through 930 k1 ^= (data[index + 1] & 0xff) << 8; 931 // falls-through 932 case 1: 933 k1 ^= data[index] & 0xff; 934 // mix functions 935 k1 *= C1_32; 936 k1 = Integer.rotateLeft(k1, R1_32); 937 k1 *= C2_32; 938 hash ^= k1; 939 } 940 hash ^= length; 941 return fmix32(hash); 942 } 943 944 /** 945 * Generates 64-bit hash from a byte array with a default seed. 946 * 947 * <p> 948 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong> 949 * </p> 950 * 951 * <p> 952 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])} 953 * with the same byte data. This method will be removed in a future release. 954 * </p> 955 * 956 * <p> 957 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive. 958 * </p> 959 * 960 * <p> 961 * This is a helper method that will produce the same result as: 962 * </p> 963 * 964 * <pre> 965 * 966 * int offset = 0; 967 * int seed = 104729; 968 * long hash = MurmurHash3.hash64(data, offset, data.length, seed); 969 * </pre> 970 * 971 * @param data The input byte array. 972 * @return The 64-bit hash. 973 * @see #hash64(byte[], int, int, int) 974 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])}. 975 */ 976 @Deprecated 977 public static long hash64(final byte[] data) { 978 return hash64(data, 0, data.length, DEFAULT_SEED); 979 } 980 981 /** 982 * Generates 64-bit hash from a byte array with the given offset and length and a default seed. 983 * 984 * <p> 985 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong> 986 * </p> 987 * 988 * <p> 989 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])} 990 * with the same byte data. This method will be removed in a future release. 991 * </p> 992 * 993 * <p> 994 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive. 995 * </p> 996 * 997 * <p> 998 * This is a helper method that will produce the same result as: 999 * </p> 1000 * 1001 * <pre> 1002 * 1003 * int seed = 104729; 1004 * long hash = MurmurHash3.hash64(data, offset, length, seed); 1005 * </pre> 1006 * 1007 * @param data The input byte array. 1008 * @param offset The offset of data. 1009 * @param length The length of array. 1010 * @return The 64-bit hash. 1011 * @see #hash64(byte[], int, int, int) 1012 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}. 1013 */ 1014 @Deprecated 1015 public static long hash64(final byte[] data, final int offset, final int length) { 1016 return hash64(data, offset, length, DEFAULT_SEED); 1017 } 1018 1019 /** 1020 * Generates 64-bit hash from a byte array with the given offset, length and seed. 1021 * 1022 * <p> 1023 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong> 1024 * </p> 1025 * 1026 * <p> 1027 * This is a Murmur3-like 64-bit variant. This method will be removed in a future release. 1028 * </p> 1029 * 1030 * <p> 1031 * This implementation contains a sign-extension bug in the seed initialization. This manifests if the seed is negative. 1032 * </p> 1033 * 1034 * <p> 1035 * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks of data processed in the MurmurHash3 1036 * {@code MurmurHash3_x64_128} method. However the hash is not mixed with a hash chunk from the next 8 bytes of data. The method will not return the same 1037 * value as the first or second 64-bits of the function {@link #hash128(byte[], int, int, int)}. 1038 * </p> 1039 * 1040 * <p> 1041 * Use of this method is not advised. Use the first long returned from {@link #hash128x64(byte[], int, int, int)}. 1042 * </p> 1043 * 1044 * @param data The input byte array. 1045 * @param offset The offset of data. 1046 * @param length The length of array. 1047 * @param seed The initial seed value. 1048 * @return The 64-bit hash. 1049 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}. 1050 */ 1051 @Deprecated 1052 public static long hash64(final byte[] data, final int offset, final int length, final int seed) { 1053 // Note: This fails to apply masking using 0xffffffffL to the seed. 1054 long hash = seed; 1055 final int nblocks = length >> 3; 1056 // body 1057 for (int i = 0; i < nblocks; i++) { 1058 final int index = offset + (i << 3); 1059 long k = MurmurHash.getLittleEndianLong(data, index); 1060 // mix functions 1061 k *= C1; 1062 k = Long.rotateLeft(k, R1); 1063 k *= C2; 1064 hash ^= k; 1065 hash = Long.rotateLeft(hash, R2) * M + N1; 1066 } 1067 // tail 1068 long k1 = 0; 1069 final int index = offset + (nblocks << 3); 1070 switch (offset + length - index) { 1071 case 7: 1072 k1 ^= ((long) data[index + 6] & 0xff) << 48; 1073 // falls-through 1074 case 6: 1075 k1 ^= ((long) data[index + 5] & 0xff) << 40; 1076 // falls-through 1077 case 5: 1078 k1 ^= ((long) data[index + 4] & 0xff) << 32; 1079 // falls-through 1080 case 4: 1081 k1 ^= ((long) data[index + 3] & 0xff) << 24; 1082 // falls-through 1083 case 3: 1084 k1 ^= ((long) data[index + 2] & 0xff) << 16; 1085 // falls-through 1086 case 2: 1087 k1 ^= ((long) data[index + 1] & 0xff) << 8; 1088 // falls-through 1089 case 1: 1090 k1 ^= (long) data[index] & 0xff; 1091 k1 *= C1; 1092 k1 = Long.rotateLeft(k1, R1); 1093 k1 *= C2; 1094 hash ^= k1; 1095 } 1096 // finalization 1097 hash ^= length; 1098 return fmix64(hash); 1099 } 1100 1101 /** 1102 * Generates 64-bit hash from an int with a default seed. 1103 * 1104 * <p> 1105 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong> 1106 * </p> 1107 * 1108 * <p> 1109 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])} 1110 * with the same byte data from the {@code int}. This method will be removed in a future release. 1111 * </p> 1112 * 1113 * <p> 1114 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive. 1115 * </p> 1116 * 1117 * <p> 1118 * This is a helper method that will produce the same result as: 1119 * </p> 1120 * 1121 * <pre> 1122 * 1123 * int offset = 0; 1124 * int seed = 104729; 1125 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4).putInt(data).array(), offset, 4, seed); 1126 * </pre> 1127 * 1128 * @param data The int to hash. 1129 * @return The 64-bit hash. 1130 * @see #hash64(byte[], int, int, int) 1131 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}. 1132 */ 1133 @Deprecated 1134 public static long hash64(final int data) { 1135 long k1 = Integer.reverseBytes(data) & -1L >>> 32; 1136 long hash = DEFAULT_SEED; 1137 k1 *= C1; 1138 k1 = Long.rotateLeft(k1, R1); 1139 k1 *= C2; 1140 hash ^= k1; 1141 // finalization 1142 hash ^= Integer.BYTES; 1143 return fmix64(hash); 1144 } 1145 1146 /** 1147 * Generates 64-bit hash from a long with a default seed. 1148 * 1149 * <p> 1150 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong> 1151 * </p> 1152 * 1153 * <p> 1154 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])} 1155 * with the same byte data from the {@code long}. This method will be removed in a future release. 1156 * </p> 1157 * 1158 * <p> 1159 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive. 1160 * </p> 1161 * 1162 * <p> 1163 * This is a helper method that will produce the same result as: 1164 * </p> 1165 * 1166 * <pre> 1167 * 1168 * int offset = 0; 1169 * int seed = 104729; 1170 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8).putLong(data).array(), offset, 8, seed); 1171 * </pre> 1172 * 1173 * @param data The long to hash. 1174 * @return The 64-bit hash. 1175 * @see #hash64(byte[], int, int, int) 1176 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}. 1177 */ 1178 @Deprecated 1179 public static long hash64(final long data) { 1180 long hash = DEFAULT_SEED; 1181 long k = Long.reverseBytes(data); 1182 // mix functions 1183 k *= C1; 1184 k = Long.rotateLeft(k, R1); 1185 k *= C2; 1186 hash ^= k; 1187 hash = Long.rotateLeft(hash, R2) * M + N1; 1188 // finalization 1189 hash ^= Long.BYTES; 1190 return fmix64(hash); 1191 } 1192 1193 /** 1194 * Generates 64-bit hash from a short with a default seed. 1195 * 1196 * <p> 1197 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong> 1198 * </p> 1199 * 1200 * <p> 1201 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])} 1202 * with the same byte data from the {@code short}. This method will be removed in a future release. 1203 * </p> 1204 * 1205 * <p> 1206 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive. 1207 * </p> 1208 * 1209 * <p> 1210 * This is a helper method that will produce the same result as: 1211 * </p> 1212 * 1213 * <pre> 1214 * 1215 * int offset = 0; 1216 * int seed = 104729; 1217 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2).putShort(data).array(), offset, 2, seed); 1218 * </pre> 1219 * 1220 * @param data The short to hash. 1221 * @return The 64-bit hash. 1222 * @see #hash64(byte[], int, int, int) 1223 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the 1224 * {@code short}. 1225 */ 1226 @Deprecated 1227 public static long hash64(final short data) { 1228 long hash = DEFAULT_SEED; 1229 long k1 = 0; 1230 k1 ^= ((long) data & 0xff) << 8; 1231 k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff; 1232 k1 *= C1; 1233 k1 = Long.rotateLeft(k1, R1); 1234 k1 *= C2; 1235 hash ^= k1; 1236 // finalization 1237 hash ^= Short.BYTES; 1238 return fmix64(hash); 1239 } 1240 1241 /** 1242 * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}. 1243 * 1244 * @param k The data to add to the hash. 1245 * @param hash The current hash. 1246 * @return The new hash. 1247 */ 1248 private static int mix32(int k, int hash) { 1249 k *= C1_32; 1250 k = Integer.rotateLeft(k, R1_32); 1251 k *= C2_32; 1252 hash ^= k; 1253 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 1254 } 1255 1256 /** No instance methods. */ 1257 private MurmurHash3() { 1258 } 1259}