ATLAS Offline Software
xxhash.h
Go to the documentation of this file.
1 /*
2  * xxHash - Extremely Fast Hash algorithm
3  * Header File
4  * Copyright (C) 2012-2023 Yann Collet
5  *
6  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are
10  * met:
11  *
12  * * Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * * Redistributions in binary form must reproduce the above
15  * copyright notice, this list of conditions and the following disclaimer
16  * in the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * You can contact the author at:
32  * - xxHash homepage: https://www.xxhash.com
33  * - xxHash source repository: https://github.com/Cyan4973/xxHash
34  */
35 
172 #if defined (__cplusplus)
173 extern "C" {
174 #endif
175 
176 /* ****************************
177  * INLINE mode
178  ******************************/
184 #ifdef XXH_DOXYGEN
185 
203 # define XXH_INLINE_ALL
204 # undef XXH_INLINE_ALL
205 
208 # define XXH_PRIVATE_API
209 # undef XXH_PRIVATE_API
210 
223 # define XXH_NAMESPACE /* YOUR NAME HERE */
224 # undef XXH_NAMESPACE
225 #endif
226 
227 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
228  && !defined(XXH_INLINE_ALL_31684351384)
229  /* this section should be traversed only once */
230 # define XXH_INLINE_ALL_31684351384
231  /* give access to the advanced API, required to compile implementations */
232 # undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
233 # define XXH_STATIC_LINKING_ONLY
234  /* make all functions private */
235 # undef XXH_PUBLIC_API
236 # if defined(__GNUC__)
237 # define XXH_PUBLIC_API static __inline __attribute__((unused))
238 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
239 # define XXH_PUBLIC_API static inline
240 # elif defined(_MSC_VER)
241 # define XXH_PUBLIC_API static __inline
242 # else
243  /* note: this version may generate warnings for unused static functions */
244 # define XXH_PUBLIC_API static
245 # endif
246 
247  /*
248  * This part deals with the special case where a unit wants to inline xxHash,
249  * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
250  * such as part of some previously included *.h header file.
251  * Without further action, the new include would just be ignored,
252  * and functions would effectively _not_ be inlined (silent failure).
253  * The following macros solve this situation by prefixing all inlined names,
254  * avoiding naming collision with previous inclusions.
255  */
256  /* Before that, we unconditionally #undef all symbols,
257  * in case they were already defined with XXH_NAMESPACE.
258  * They will then be redefined for XXH_INLINE_ALL
259  */
260 # undef XXH_versionNumber
261  /* XXH32 */
262 # undef XXH32
263 # undef XXH32_createState
264 # undef XXH32_freeState
265 # undef XXH32_reset
266 # undef XXH32_update
267 # undef XXH32_digest
268 # undef XXH32_copyState
269 # undef XXH32_canonicalFromHash
270 # undef XXH32_hashFromCanonical
271  /* XXH64 */
272 # undef XXH64
273 # undef XXH64_createState
274 # undef XXH64_freeState
275 # undef XXH64_reset
276 # undef XXH64_update
277 # undef XXH64_digest
278 # undef XXH64_copyState
279 # undef XXH64_canonicalFromHash
280 # undef XXH64_hashFromCanonical
281  /* XXH3_64bits */
282 # undef XXH3_64bits
283 # undef XXH3_64bits_withSecret
284 # undef XXH3_64bits_withSeed
285 # undef XXH3_64bits_withSecretandSeed
286 # undef XXH3_createState
287 # undef XXH3_freeState
288 # undef XXH3_copyState
289 # undef XXH3_64bits_reset
290 # undef XXH3_64bits_reset_withSeed
291 # undef XXH3_64bits_reset_withSecret
292 # undef XXH3_64bits_update
293 # undef XXH3_64bits_digest
294 # undef XXH3_generateSecret
295  /* XXH3_128bits */
296 # undef XXH128
297 # undef XXH3_128bits
298 # undef XXH3_128bits_withSeed
299 # undef XXH3_128bits_withSecret
300 # undef XXH3_128bits_reset
301 # undef XXH3_128bits_reset_withSeed
302 # undef XXH3_128bits_reset_withSecret
303 # undef XXH3_128bits_reset_withSecretandSeed
304 # undef XXH3_128bits_update
305 # undef XXH3_128bits_digest
306 # undef XXH128_isEqual
307 # undef XXH128_cmp
308 # undef XXH128_canonicalFromHash
309 # undef XXH128_hashFromCanonical
310  /* Finally, free the namespace itself */
311 # undef XXH_NAMESPACE
312 
313  /* employ the namespace for XXH_INLINE_ALL */
314 # define XXH_NAMESPACE XXH_INLINE_
315  /*
316  * Some identifiers (enums, type names) are not symbols,
317  * but they must nonetheless be renamed to avoid redeclaration.
318  * Alternative solution: do not redeclare them.
319  * However, this requires some #ifdefs, and has a more dispersed impact.
320  * Meanwhile, renaming can be achieved in a single place.
321  */
322 # define XXH_IPREF(Id) XXH_NAMESPACE ## Id
323 # define XXH_OK XXH_IPREF(XXH_OK)
324 # define XXH_ERROR XXH_IPREF(XXH_ERROR)
325 # define XXH_errorcode XXH_IPREF(XXH_errorcode)
326 # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
327 # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
328 # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
329 # define XXH32_state_s XXH_IPREF(XXH32_state_s)
330 # define XXH32_state_t XXH_IPREF(XXH32_state_t)
331 # define XXH64_state_s XXH_IPREF(XXH64_state_s)
332 # define XXH64_state_t XXH_IPREF(XXH64_state_t)
333 # define XXH3_state_s XXH_IPREF(XXH3_state_s)
334 # define XXH3_state_t XXH_IPREF(XXH3_state_t)
335 # define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
336  /* Ensure the header is parsed again, even if it was previously included */
337 # undef XXHASH_H_5627135585666179
338 # undef XXHASH_H_STATIC_13879238742
339 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
340 
341 /* ****************************************************************
342  * Stable API
343  *****************************************************************/
344 #ifndef XXHASH_H_5627135585666179
345 #define XXHASH_H_5627135585666179 1
346 
348 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
349 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
350 # ifdef XXH_EXPORT
351 # define XXH_PUBLIC_API __declspec(dllexport)
352 # elif XXH_IMPORT
353 # define XXH_PUBLIC_API __declspec(dllimport)
354 # endif
355 # else
356 # define XXH_PUBLIC_API /* do nothing */
357 # endif
358 #endif
359 
360 #ifdef XXH_NAMESPACE
361 # define XXH_CAT(A,B) A##B
362 # define XXH_NAME2(A,B) XXH_CAT(A,B)
363 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
364 /* XXH32 */
365 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
366 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
367 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
368 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
369 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
370 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
371 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
372 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
373 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
374 /* XXH64 */
375 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
376 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
377 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
378 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
379 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
380 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
381 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
382 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
383 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
384 /* XXH3_64bits */
385 # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
386 # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
387 # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
388 # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
389 # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
390 # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
391 # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
392 # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
393 # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
394 # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
395 # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
396 # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
397 # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
398 # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
399 # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
400 /* XXH3_128bits */
401 # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
402 # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
403 # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
404 # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
405 # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
406 # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
407 # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
408 # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
409 # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
410 # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
411 # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
412 # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
413 # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
414 # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
415 # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
416 #endif
417 
418 
419 /* *************************************
420 * Compiler specifics
421 ***************************************/
422 
423 /* specific declaration modes for Windows */
424 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
425 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
426 # ifdef XXH_EXPORT
427 # define XXH_PUBLIC_API __declspec(dllexport)
428 # elif XXH_IMPORT
429 # define XXH_PUBLIC_API __declspec(dllimport)
430 # endif
431 # else
432 # define XXH_PUBLIC_API /* do nothing */
433 # endif
434 #endif
435 
436 #if defined (__GNUC__)
437 # define XXH_CONSTF __attribute__((const))
438 # define XXH_PUREF __attribute__((pure))
439 # define XXH_MALLOCF __attribute__((malloc))
440 #else
441 # define XXH_CONSTF /* disable */
442 # define XXH_PUREF
443 # define XXH_MALLOCF
444 #endif
445 
446 /* *************************************
447 * Version
448 ***************************************/
449 #define XXH_VERSION_MAJOR 0
450 #define XXH_VERSION_MINOR 8
451 #define XXH_VERSION_RELEASE 2
452 
453 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
454 
464 
465 
466 /* ****************************
467 * Common basic types
468 ******************************/
469 #include <stddef.h> /* size_t */
473 typedef enum {
474  XXH_OK = 0,
475  XXH_ERROR
477 
478 
479 /*-**********************************************************************
480 * 32-bit hash
481 ************************************************************************/
482 #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
483 
488 typedef uint32_t XXH32_hash_t;
489 
490 #elif !defined (__VMS) \
491  && (defined (__cplusplus) \
492  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
493 # include <stdint.h>
494  typedef uint32_t XXH32_hash_t;
495 
496 #else
497 # include <limits.h>
498 # if UINT_MAX == 0xFFFFFFFFUL
499  typedef unsigned int XXH32_hash_t;
500 # elif ULONG_MAX == 0xFFFFFFFFUL
501  typedef unsigned long XXH32_hash_t;
502 # else
503 # error "unsupported platform: need a 32-bit type"
504 # endif
505 #endif
506 
548 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
549 
550 #ifndef XXH_NO_STREAM
551 
582 typedef struct XXH32_state_s XXH32_state_t;
583 
607 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
608 
622 XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
623 
643 
658 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
659 #endif /* !XXH_NO_STREAM */
660 
661 /******* Canonical representation *******/
662 
663 /*
664  * The default return values from XXH functions are unsigned 32 and 64 bit
665  * integers.
666  * This the simplest and fastest format for further post-processing.
667  *
668  * However, this leaves open the question of what is the order on the byte level,
669  * since little and big endian conventions will store the same number differently.
670  *
671  * The canonical representation settles this issue by mandating big-endian
672  * convention, the same convention as human-readable numbers (large digits first).
673  *
674  * When writing hash values to storage, sending them over a network, or printing
675  * them, it's highly recommended to use the canonical representation to ensure
676  * portability across a wider range of systems, present and future.
677  *
678  * The following functions allow transformation of hash values to and from
679  * canonical format.
680  */
681 
685 typedef struct {
686  unsigned char digest[4];
688 
699 
711 
712 
713 #ifdef __has_attribute
714 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
715 #else
716 # define XXH_HAS_ATTRIBUTE(x) 0
717 #endif
718 
719 /*
720  * C23 __STDC_VERSION__ number hasn't been specified yet. For now
721  * leave as `201711L` (C17 + 1).
722  * TODO: Update to correct value when its been specified.
723  */
724 #define XXH_C23_VN 201711L
725 
726 /* C-language Attributes are added in C23. */
727 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
728 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
729 #else
730 # define XXH_HAS_C_ATTRIBUTE(x) 0
731 #endif
732 
733 #if defined(__cplusplus) && defined(__has_cpp_attribute)
734 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
735 #else
736 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
737 #endif
738 
739 /*
740  * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
741  * introduced in CPP17 and C23.
742  * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
743  * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
744  */
745 #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
746 # define XXH_FALLTHROUGH [[fallthrough]]
747 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
748 # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
749 #else
750 # define XXH_FALLTHROUGH /* fallthrough */
751 #endif
752 
753 /*
754  * Define XXH_NOESCAPE for annotated pointers in public API.
755  * https://clang.llvm.org/docs/AttributeReference.html#noescape
756  * As of writing this, only supported by clang.
757  */
758 #if XXH_HAS_ATTRIBUTE(noescape)
759 # define XXH_NOESCAPE __attribute__((noescape))
760 #else
761 # define XXH_NOESCAPE
762 #endif
763 
764 
771 #ifndef XXH_NO_LONG_LONG
772 /*-**********************************************************************
773 * 64-bit hash
774 ************************************************************************/
775 #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
776 
781 typedef uint64_t XXH64_hash_t;
782 #elif !defined (__VMS) \
783  && (defined (__cplusplus) \
784  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
785 # include <stdint.h>
786  typedef uint64_t XXH64_hash_t;
787 #else
788 # include <limits.h>
789 # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
790  /* LP64 ABI says uint64_t is unsigned long */
791  typedef unsigned long XXH64_hash_t;
792 # else
793  /* the following type must have a width of 64-bit */
794  typedef unsigned long long XXH64_hash_t;
795 # endif
796 #endif
797 
836 
837 /******* Streaming *******/
838 #ifndef XXH_NO_STREAM
839 
844 typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
848 
852 #endif /* !XXH_NO_STREAM */
853 /******* Canonical representation *******/
854 typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
857 
858 #ifndef XXH_NO_XXH3
859 
906 /*-**********************************************************************
907 * XXH3 64-bit variant
908 ************************************************************************/
909 
925 
942 
950 #define XXH3_SECRET_SIZE_MIN 136
951 
970 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
971 
972 
973 /******* Streaming *******/
974 #ifndef XXH_NO_STREAM
975 /*
976  * Streaming requires state maintenance.
977  * This operation costs memory and CPU.
978  * As a consequence, streaming is slower than one-shot hashing.
979  * For better performance, prefer one-shot functions whenever applicable.
980  */
981 
987 typedef struct XXH3_state_s XXH3_state_t;
991 
992 /*
993  * XXH3_64bits_reset():
994  * Initialize with default parameters.
995  * digest will be equivalent to `XXH3_64bits()`.
996  */
998 /*
999  * XXH3_64bits_reset_withSeed():
1000  * Generate a custom secret from `seed`, and store it into `statePtr`.
1001  * digest will be equivalent to `XXH3_64bits_withSeed()`.
1002  */
1014 
1017 #endif /* !XXH_NO_STREAM */
1018 
1019 /* note : canonical representation of XXH3 is the same as XXH64
1020  * since they both produce XXH64_hash_t values */
1021 
1022 
1023 /*-**********************************************************************
1024 * XXH3 128-bit variant
1025 ************************************************************************/
1026 
1033 typedef struct {
1036 } XXH128_hash_t;
1037 
1059 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
1060 
1061 /******* Streaming *******/
1062 #ifndef XXH_NO_STREAM
1063 /*
1064  * Streaming requires state maintenance.
1065  * This operation costs memory and CPU.
1066  * As a consequence, streaming is slower than one-shot hashing.
1067  * For better performance, prefer one-shot functions whenever applicable.
1068  *
1069  * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
1070  * Use already declared XXH3_createState() and XXH3_freeState().
1071  *
1072  * All reset and streaming functions have same meaning as their 64-bit counterpart.
1073  */
1074 
1079 
1082 #endif /* !XXH_NO_STREAM */
1083 
1084 /* Following helper functions make it possible to compare XXH128_hast_t values.
1085  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
1086  * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
1087 
1093 
1102 XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
1103 
1104 
1105 /******* Canonical representation *******/
1106 typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
1109 
1110 
1111 #endif /* !XXH_NO_XXH3 */
1112 #endif /* XXH_NO_LONG_LONG */
1113 
1117 #endif /* XXHASH_H_5627135585666179 */
1118 
1119 
1120 
1121 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
1122 #define XXHASH_H_STATIC_13879238742
1123 /* ****************************************************************************
1124  * This section contains declarations which are not guaranteed to remain stable.
1125  * They may change in future versions, becoming incompatible with a different
1126  * version of the library.
1127  * These declarations should only be used with static linking.
1128  * Never use them in association with dynamic linking!
1129  ***************************************************************************** */
1130 
1131 /*
1132  * These definitions are only present to allow static allocation
1133  * of XXH states, on stack or in a struct, for example.
1134  * Never **ever** access their members directly.
1135  */
1136 
1149 struct XXH32_state_s {
1150  XXH32_hash_t total_len_32;
1151  XXH32_hash_t large_len;
1152  XXH32_hash_t v[4];
1153  XXH32_hash_t mem32[4];
1154  XXH32_hash_t memsize;
1155  XXH32_hash_t reserved;
1156 }; /* typedef'd to XXH32_state_t */
1157 
1158 
1159 #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
1160 
1173 struct XXH64_state_s {
1174  XXH64_hash_t total_len;
1175  XXH64_hash_t v[4];
1176  XXH64_hash_t mem64[4];
1177  XXH32_hash_t memsize;
1178  XXH32_hash_t reserved32;
1179  XXH64_hash_t reserved64;
1180 }; /* typedef'd to XXH64_state_t */
1181 
1182 #ifndef XXH_NO_XXH3
1183 
1184 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
1185 # include <stdalign.h>
1186 # define XXH_ALIGN(n) alignas(n)
1187 #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
1188 /* In C++ alignas() is a keyword */
1189 # define XXH_ALIGN(n) alignas(n)
1190 #elif defined(__GNUC__)
1191 # define XXH_ALIGN(n) __attribute__ ((aligned(n)))
1192 #elif defined(_MSC_VER)
1193 # define XXH_ALIGN(n) __declspec(align(n))
1194 #else
1195 # define XXH_ALIGN(n) /* disabled */
1196 #endif
1197 
1198 /* Old GCC versions only accept the attribute after the type in structures. */
1199 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
1200  && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
1201  && defined(__GNUC__)
1202 # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1203 #else
1204 # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1205 #endif
1206 
1214 #define XXH3_INTERNALBUFFER_SIZE 256
1215 
1223 #define XXH3_SECRET_DEFAULT_SIZE 192
1224 
1247 struct XXH3_state_s {
1248  XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
1250  XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1252  XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1254  XXH32_hash_t bufferedSize;
1256  XXH32_hash_t useSeed;
1258  size_t nbStripesSoFar;
1260  XXH64_hash_t totalLen;
1262  size_t nbStripesPerBlock;
1264  size_t secretLimit;
1268  XXH64_hash_t reserved64;
1270  const unsigned char* extSecret;
1273  /* note: there may be some padding at the end due to alignment on 64 bytes */
1274 }; /* typedef'd to XXH3_state_t */
1275 
1276 #undef XXH_ALIGN_MEMBER
1277 
1289 #define XXH3_INITSTATE(XXH3_state_ptr) \
1290  do { \
1291  XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
1292  tmp_xxh3_state_ptr->seed = 0; \
1293  tmp_xxh3_state_ptr->extSecret = NULL; \
1294  } while(0)
1295 
1296 
1300 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
1301 
1302 
1303 /* === Experimental API === */
1304 /* Symbols defined below must be considered tied to a specific library version. */
1305 
1357 XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
1358 
1396 XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
1397 
1425 XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
1426  XXH_NOESCAPE const void* secret, size_t secretSize,
1427  XXH64_hash_t seed);
1430 XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
1431  XXH_NOESCAPE const void* secret, size_t secretSize,
1432  XXH64_hash_t seed64);
1433 #ifndef XXH_NO_STREAM
1434 
1436 XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1437  XXH_NOESCAPE const void* secret, size_t secretSize,
1438  XXH64_hash_t seed64);
1441 XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1442  XXH_NOESCAPE const void* secret, size_t secretSize,
1443  XXH64_hash_t seed64);
1444 #endif /* !XXH_NO_STREAM */
1445 
1446 #endif /* !XXH_NO_XXH3 */
1447 #endif /* XXH_NO_LONG_LONG */
1448 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1449 # define XXH_IMPLEMENTATION
1450 #endif
1451 
1452 #endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
1453 
1454 
1455 /* ======================================================================== */
1456 /* ======================================================================== */
1457 /* ======================================================================== */
1458 
1459 
1460 /*-**********************************************************************
1461  * xxHash implementation
1462  *-**********************************************************************
1463  * xxHash's implementation used to be hosted inside xxhash.c.
1464  *
1465  * However, inlining requires implementation to be visible to the compiler,
1466  * hence be included alongside the header.
1467  * Previously, implementation was hosted inside xxhash.c,
1468  * which was then #included when inlining was activated.
1469  * This construction created issues with a few build and install systems,
1470  * as it required xxhash.c to be stored in /include directory.
1471  *
1472  * xxHash implementation is now directly integrated within xxhash.h.
1473  * As a consequence, xxhash.c is no longer needed in /include.
1474  *
1475  * xxhash.c is still available and is still useful.
1476  * In a "normal" setup, when xxhash is not inlined,
1477  * xxhash.h only exposes the prototypes and public symbols,
1478  * while xxhash.c can be built into an object file xxhash.o
1479  * which can then be linked into the final binary.
1480  ************************************************************************/
1481 
1482 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1483  || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1484 # define XXH_IMPLEM_13a8737387
1485 
1486 /* *************************************
1487 * Tuning parameters
1488 ***************************************/
1489 
1496 #ifdef XXH_DOXYGEN
1497 
1502 # define XXH_NO_LONG_LONG
1503 # undef XXH_NO_LONG_LONG /* don't actually */
1504 
1554 # define XXH_FORCE_MEMORY_ACCESS 0
1555 
1582 # define XXH_SIZE_OPT 0
1583 
1612 # define XXH_FORCE_ALIGN_CHECK 0
1613 
1634 # define XXH_NO_INLINE_HINTS 0
1635 
1651 # define XXH3_INLINE_SECRET 0
1652 
1663 # define XXH32_ENDJMP 0
1664 
1672 # define XXH_OLD_NAMES
1673 # undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
1674 
1683 # define XXH_NO_STREAM
1684 # undef XXH_NO_STREAM /* don't actually */
1685 #endif /* XXH_DOXYGEN */
1686 
1690 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
1691  /* prefer __packed__ structures (method 1) for GCC
1692  * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
1693  * which for some reason does unaligned loads. */
1694 # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1695 # define XXH_FORCE_MEMORY_ACCESS 1
1696 # endif
1697 #endif
1698 
1699 #ifndef XXH_SIZE_OPT
1700  /* default to 1 for -Os or -Oz */
1701 # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1702 # define XXH_SIZE_OPT 1
1703 # else
1704 # define XXH_SIZE_OPT 0
1705 # endif
1706 #endif
1707 
1708 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
1709  /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
1710 # if XXH_SIZE_OPT >= 1 || \
1711  defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
1712  || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
1713 # define XXH_FORCE_ALIGN_CHECK 0
1714 # else
1715 # define XXH_FORCE_ALIGN_CHECK 1
1716 # endif
1717 #endif
1718 
1719 #ifndef XXH_NO_INLINE_HINTS
1720 # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
1721 # define XXH_NO_INLINE_HINTS 1
1722 # else
1723 # define XXH_NO_INLINE_HINTS 0
1724 # endif
1725 #endif
1726 
1727 #ifndef XXH3_INLINE_SECRET
1728 # if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
1729  || !defined(XXH_INLINE_ALL)
1730 # define XXH3_INLINE_SECRET 0
1731 # else
1732 # define XXH3_INLINE_SECRET 1
1733 # endif
1734 #endif
1735 
1736 #ifndef XXH32_ENDJMP
1737 /* generally preferable for performance */
1738 # define XXH32_ENDJMP 0
1739 #endif
1740 
1747 /* *************************************
1748 * Includes & Memory related functions
1749 ***************************************/
1750 #if defined(XXH_NO_STREAM)
1751 /* nothing */
1752 #elif defined(XXH_NO_STDLIB)
1753 
1754 /* When requesting to disable any mention of stdlib,
1755  * the library loses the ability to invoked malloc / free.
1756  * In practice, it means that functions like `XXH*_createState()`
1757  * will always fail, and return NULL.
1758  * This flag is useful in situations where
1759  * xxhash.h is integrated into some kernel, embedded or limited environment
1760  * without access to dynamic allocation.
1761  */
1762 
1763 static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
1764 static void XXH_free(void* p) { (void)p; }
1765 
1766 #else
1767 
1768 /*
1769  * Modify the local functions below should you wish to use
1770  * different memory routines for malloc() and free()
1771  */
1772 #include <stdlib.h>
1773 
1778 static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
1779 
1784 static void XXH_free(void* p) { free(p); }
1785 
1786 #endif /* XXH_NO_STDLIB */
1787 
1788 #include <string.h>
1789 
1794 static void* XXH_memcpy(void* dest, const void* src, size_t size)
1795 {
1796  return memcpy(dest,src,size);
1797 }
1798 
1799 #include <limits.h> /* ULLONG_MAX */
1800 
1801 
1802 /* *************************************
1803 * Compiler Specific Options
1804 ***************************************/
1805 #ifdef _MSC_VER /* Visual Studio warning fix */
1806 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1807 #endif
1808 
1809 #if XXH_NO_INLINE_HINTS /* disable inlining hints */
1810 # if defined(__GNUC__) || defined(__clang__)
1811 # define XXH_FORCE_INLINE static __attribute__((unused))
1812 # else
1813 # define XXH_FORCE_INLINE static
1814 # endif
1815 # define XXH_NO_INLINE static
1816 /* enable inlining hints */
1817 #elif defined(__GNUC__) || defined(__clang__)
1818 # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1819 # define XXH_NO_INLINE static __attribute__((noinline))
1820 #elif defined(_MSC_VER) /* Visual Studio */
1821 # define XXH_FORCE_INLINE static __forceinline
1822 # define XXH_NO_INLINE static __declspec(noinline)
1823 #elif defined (__cplusplus) \
1824  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
1825 # define XXH_FORCE_INLINE static inline
1826 # define XXH_NO_INLINE static
1827 #else
1828 # define XXH_FORCE_INLINE static
1829 # define XXH_NO_INLINE static
1830 #endif
1831 
1832 #if XXH3_INLINE_SECRET
1833 # define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
1834 #else
1835 # define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
1836 #endif
1837 
1838 
1839 /* *************************************
1840 * Debug
1841 ***************************************/
1850 #ifndef XXH_DEBUGLEVEL
1851 # ifdef DEBUGLEVEL /* backwards compat */
1852 # define XXH_DEBUGLEVEL DEBUGLEVEL
1853 # else
1854 # define XXH_DEBUGLEVEL 0
1855 # endif
1856 #endif
1857 
1858 #if (XXH_DEBUGLEVEL>=1) || __CPPCHECK__
1859 # include <assert.h> /* note: can still be disabled with NDEBUG */
1860 # define XXH_ASSERT(c) assert(c)
1861 #else
1862 # define XXH_ASSERT(c) XXH_ASSUME(c)
1863 #endif
1864 
1865 /* note: use after variable declarations */
1866 #ifndef XXH_STATIC_ASSERT
1867 # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
1868 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
1869 # elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
1870 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1871 # else
1872 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
1873 # endif
1874 # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
1875 #endif
1876 
1893 #if defined(__GNUC__) || defined(__clang__)
1894 # define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
1895 #else
1896 # define XXH_COMPILER_GUARD(var) ((void)0)
1897 #endif
1898 
1899 #if defined(__clang__)
1900 # define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
1901 #else
1902 # define XXH_COMPILER_GUARD_W(var) ((void)0)
1903 #endif
1904 
1905 /* *************************************
1906 * Basic Types
1907 ***************************************/
1908 #if !defined (__VMS) \
1909  && (defined (__cplusplus) \
1910  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
1911 # include <stdint.h>
1912  typedef uint8_t xxh_u8;
1913 #else
1914  typedef unsigned char xxh_u8;
1915 #endif
1916 typedef XXH32_hash_t xxh_u32;
1917 
1918 #ifdef XXH_OLD_NAMES
1919 # define BYTE xxh_u8
1920 # define U8 xxh_u8
1921 # define U32 xxh_u32
1922 #endif
1923 
1924 /* *** Memory access *** */
1925 
1976 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1977 /*
1978  * Manual byteshift. Best for old compilers which don't inline memcpy.
1979  * We actually directly use XXH_readLE32 and XXH_readBE32.
1980  */
1981 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1982 
1983 /*
1984  * Force direct memory access. Only works on CPU which support unaligned memory
1985  * access in hardware.
1986  */
1987 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
1988 
1989 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1990 
1991 /*
1992  * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
1993  * documentation claimed that it only increased the alignment, but actually it
1994  * can decrease it on gcc, clang, and icc:
1995  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
1996  * https://gcc.godbolt.org/z/xYez1j67Y.
1997  */
1998 #ifdef XXH_OLD_NAMES
1999 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
2000 #endif
2001 static xxh_u32 XXH_read32(const void* ptr)
2002 {
2003  typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
2004  return *((const xxh_unalign32*)ptr);
2005 }
2006 
2007 #else
2008 
2009 /*
2010  * Portable and safe solution. Generally efficient.
2011  * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2012  */
2013 static xxh_u32 XXH_read32(const void* memPtr)
2014 {
2015  xxh_u32 val;
2016  XXH_memcpy(&val, memPtr, sizeof(val));
2017  return val;
2018 }
2019 
2020 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2021 
2022 
2023 /* *** Endianness *** */
2024 
2041 #ifndef XXH_CPU_LITTLE_ENDIAN
2042 /*
2043  * Try to detect endianness automatically, to avoid the nonstandard behavior
2044  * in `XXH_isLittleEndian()`
2045  */
2046 # if defined(_WIN32) /* Windows is always little endian */ \
2047  || defined(__LITTLE_ENDIAN__) \
2048  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2049 # define XXH_CPU_LITTLE_ENDIAN 1
2050 # elif defined(__BIG_ENDIAN__) \
2051  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2052 # define XXH_CPU_LITTLE_ENDIAN 0
2053 # else
2054 
2060 static int XXH_isLittleEndian(void)
2061 {
2062  /*
2063  * Portable and well-defined behavior.
2064  * Don't use static: it is detrimental to performance.
2065  */
2066  const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
2067  return one.c[0];
2068 }
2069 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
2070 # endif
2071 #endif
2072 
2073 
2074 
2075 
2076 /* ****************************************
2077 * Compiler-specific Functions and Macros
2078 ******************************************/
2079 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2080 
2081 #ifdef __has_builtin
2082 # define XXH_HAS_BUILTIN(x) __has_builtin(x)
2083 #else
2084 # define XXH_HAS_BUILTIN(x) 0
2085 #endif
2086 
2087 
2088 
2089 /*
2090  * C23 and future versions have standard "unreachable()".
2091  * Once it has been implemented reliably we can add it as an
2092  * additional case:
2093  *
2094  * ```
2095  * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
2096  * # include <stddef.h>
2097  * # ifdef unreachable
2098  * # define XXH_UNREACHABLE() unreachable()
2099  * # endif
2100  * #endif
2101  * ```
2102  *
2103  * Note C++23 also has std::unreachable() which can be detected
2104  * as follows:
2105  * ```
2106  * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
2107  * # include <utility>
2108  * # define XXH_UNREACHABLE() std::unreachable()
2109  * #endif
2110  * ```
2111  * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
2112  * We don't use that as including `<utility>` in `extern "C"` blocks
2113  * doesn't work on GCC12
2114  */
2115 
2116 #if XXH_HAS_BUILTIN(__builtin_unreachable)
2117 # define XXH_UNREACHABLE() __builtin_unreachable()
2118 
2119 #elif defined(_MSC_VER)
2120 # define XXH_UNREACHABLE() __assume(0)
2121 
2122 #else
2123 # define XXH_UNREACHABLE()
2124 #endif
2125 
2126 #if XXH_HAS_BUILTIN(__builtin_assume)
2127 # define XXH_ASSUME(c) __builtin_assume(c)
2128 #else
2129 # define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
2130 #endif
2131 
2145 #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
2146  && XXH_HAS_BUILTIN(__builtin_rotateleft64)
2147 # define XXH_rotl32 __builtin_rotateleft32
2148 # define XXH_rotl64 __builtin_rotateleft64
2149 /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
2150 #elif defined(_MSC_VER)
2151 # define XXH_rotl32(x,r) _rotl(x,r)
2152 # define XXH_rotl64(x,r) _rotl64(x,r)
2153 #else
2154 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
2155 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
2156 #endif
2157 
2166 #if defined(_MSC_VER) /* Visual Studio */
2167 # define XXH_swap32 _byteswap_ulong
2168 #elif XXH_GCC_VERSION >= 403
2169 # define XXH_swap32 __builtin_bswap32
2170 #else
2171 static xxh_u32 XXH_swap32 (xxh_u32 x)
2172 {
2173  return ((x << 24) & 0xff000000 ) |
2174  ((x << 8) & 0x00ff0000 ) |
2175  ((x >> 8) & 0x0000ff00 ) |
2176  ((x >> 24) & 0x000000ff );
2177 }
2178 #endif
2179 
2180 
2181 /* ***************************
2182 * Memory reads
2183 *****************************/
2184 
2189 typedef enum {
2190  XXH_aligned,
2191  XXH_unaligned
2192 } XXH_alignment;
2193 
2194 /*
2195  * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
2196  *
2197  * This is ideal for older compilers which don't inline memcpy.
2198  */
2199 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2200 
2201 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
2202 {
2203  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2204  return bytePtr[0]
2205  | ((xxh_u32)bytePtr[1] << 8)
2206  | ((xxh_u32)bytePtr[2] << 16)
2207  | ((xxh_u32)bytePtr[3] << 24);
2208 }
2209 
2210 XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
2211 {
2212  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2213  return bytePtr[3]
2214  | ((xxh_u32)bytePtr[2] << 8)
2215  | ((xxh_u32)bytePtr[1] << 16)
2216  | ((xxh_u32)bytePtr[0] << 24);
2217 }
2218 
2219 #else
2220 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
2221 {
2222  return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
2223 }
2224 
2225 static xxh_u32 XXH_readBE32(const void* ptr)
2226 {
2227  return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
2228 }
2229 #endif
2230 
2231 XXH_FORCE_INLINE xxh_u32
2232 XXH_readLE32_align(const void* ptr, XXH_alignment align)
2233 {
2234  if (align==XXH_unaligned) {
2235  return XXH_readLE32(ptr);
2236  } else {
2237  return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
2238  }
2239 }
2240 
2241 
2242 /* *************************************
2243 * Misc
2244 ***************************************/
2246 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
2247 
2248 
2249 /* *******************************************************************
2250 * 32-bit hash functions
2251 *********************************************************************/
2260  /* #define instead of static const, to be used as initializers */
2261 #define XXH_PRIME32_1 0x9E3779B1U
2262 #define XXH_PRIME32_2 0x85EBCA77U
2263 #define XXH_PRIME32_3 0xC2B2AE3DU
2264 #define XXH_PRIME32_4 0x27D4EB2FU
2265 #define XXH_PRIME32_5 0x165667B1U
2267 #ifdef XXH_OLD_NAMES
2268 # define PRIME32_1 XXH_PRIME32_1
2269 # define PRIME32_2 XXH_PRIME32_2
2270 # define PRIME32_3 XXH_PRIME32_3
2271 # define PRIME32_4 XXH_PRIME32_4
2272 # define PRIME32_5 XXH_PRIME32_5
2273 #endif
2274 
2286 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
2287 {
2288  acc += input * XXH_PRIME32_2;
2289  acc = XXH_rotl32(acc, 13);
2290  acc *= XXH_PRIME32_1;
2291 #if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
2292  /*
2293  * UGLY HACK:
2294  * A compiler fence is the only thing that prevents GCC and Clang from
2295  * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
2296  * reason) without globally disabling SSE4.1.
2297  *
2298  * The reason we want to avoid vectorization is because despite working on
2299  * 4 integers at a time, there are multiple factors slowing XXH32 down on
2300  * SSE4:
2301  * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
2302  * newer chips!) making it slightly slower to multiply four integers at
2303  * once compared to four integers independently. Even when pmulld was
2304  * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
2305  * just to multiply unless doing a long operation.
2306  *
2307  * - Four instructions are required to rotate,
2308  * movqda tmp, v // not required with VEX encoding
2309  * pslld tmp, 13 // tmp <<= 13
2310  * psrld v, 19 // x >>= 19
2311  * por v, tmp // x |= tmp
2312  * compared to one for scalar:
2313  * roll v, 13 // reliably fast across the board
2314  * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
2315  *
2316  * - Instruction level parallelism is actually more beneficial here because
2317  * the SIMD actually serializes this operation: While v1 is rotating, v2
2318  * can load data, while v3 can multiply. SSE forces them to operate
2319  * together.
2320  *
2321  * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
2322  * the loop. NEON is only faster on the A53, and with the newer cores, it is less
2323  * than half the speed.
2324  */
2325  XXH_COMPILER_GUARD(acc);
2326 #endif
2327  return acc;
2328 }
2329 
2340 static xxh_u32 XXH32_avalanche(xxh_u32 hash)
2341 {
2342  hash ^= hash >> 15;
2343  hash *= XXH_PRIME32_2;
2344  hash ^= hash >> 13;
2345  hash *= XXH_PRIME32_3;
2346  hash ^= hash >> 16;
2347  return hash;
2348 }
2349 
2350 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
2351 
2367 static XXH_PUREF xxh_u32
2368 XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2369 {
2370 #define XXH_PROCESS1 do { \
2371  hash += (*ptr++) * XXH_PRIME32_5; \
2372  hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
2373 } while (0)
2374 
2375 #define XXH_PROCESS4 do { \
2376  hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2377  ptr += 4; \
2378  hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
2379 } while (0)
2380 
2381  if (ptr==NULL) XXH_ASSERT(len == 0);
2382 
2383  /* Compact rerolled version; generally faster */
2384  if (!XXH32_ENDJMP) {
2385  len &= 15;
2386  while (len >= 4) {
2387  XXH_PROCESS4;
2388  len -= 4;
2389  }
2390  while (len > 0) {
2391  XXH_PROCESS1;
2392  --len;
2393  }
2394  return XXH32_avalanche(hash);
2395  } else {
2396  switch(len&15) /* or switch(bEnd - p) */ {
2397  case 12: XXH_PROCESS4;
2398  XXH_FALLTHROUGH; /* fallthrough */
2399  case 8: XXH_PROCESS4;
2400  XXH_FALLTHROUGH; /* fallthrough */
2401  case 4: XXH_PROCESS4;
2402  return XXH32_avalanche(hash);
2403 
2404  case 13: XXH_PROCESS4;
2405  XXH_FALLTHROUGH; /* fallthrough */
2406  case 9: XXH_PROCESS4;
2407  XXH_FALLTHROUGH; /* fallthrough */
2408  case 5: XXH_PROCESS4;
2409  XXH_PROCESS1;
2410  return XXH32_avalanche(hash);
2411 
2412  case 14: XXH_PROCESS4;
2413  XXH_FALLTHROUGH; /* fallthrough */
2414  case 10: XXH_PROCESS4;
2415  XXH_FALLTHROUGH; /* fallthrough */
2416  case 6: XXH_PROCESS4;
2417  XXH_PROCESS1;
2418  XXH_PROCESS1;
2419  return XXH32_avalanche(hash);
2420 
2421  case 15: XXH_PROCESS4;
2422  XXH_FALLTHROUGH; /* fallthrough */
2423  case 11: XXH_PROCESS4;
2424  XXH_FALLTHROUGH; /* fallthrough */
2425  case 7: XXH_PROCESS4;
2426  XXH_FALLTHROUGH; /* fallthrough */
2427  case 3: XXH_PROCESS1;
2428  XXH_FALLTHROUGH; /* fallthrough */
2429  case 2: XXH_PROCESS1;
2430  XXH_FALLTHROUGH; /* fallthrough */
2431  case 1: XXH_PROCESS1;
2432  XXH_FALLTHROUGH; /* fallthrough */
2433  case 0: return XXH32_avalanche(hash);
2434  }
2435  XXH_ASSERT(0);
2436  return hash; /* reaching this point is deemed impossible */
2437  }
2438 }
2439 
2440 #ifdef XXH_OLD_NAMES
2441 # define PROCESS1 XXH_PROCESS1
2442 # define PROCESS4 XXH_PROCESS4
2443 #else
2444 # undef XXH_PROCESS1
2445 # undef XXH_PROCESS4
2446 #endif
2447 
2456 XXH_FORCE_INLINE XXH_PUREF xxh_u32
2457 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
2458 {
2459  xxh_u32 h32;
2460 
2461  if (input==NULL) XXH_ASSERT(len == 0);
2462 
2463  if (len>=16) {
2464  const xxh_u8* const bEnd = input + len;
2465  const xxh_u8* const limit = bEnd - 15;
2466  xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2467  xxh_u32 v2 = seed + XXH_PRIME32_2;
2468  xxh_u32 v3 = seed + 0;
2469  xxh_u32 v4 = seed - XXH_PRIME32_1;
2470 
2471  do {
2472  v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
2473  v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
2474  v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
2475  v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
2476  } while (input < limit);
2477 
2478  h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
2479  + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2480  } else {
2481  h32 = seed + XXH_PRIME32_5;
2482  }
2483 
2484  h32 += (xxh_u32)len;
2485 
2486  return XXH32_finalize(h32, input, len&15, align);
2487 }
2488 
2490 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
2491 {
2492 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2493  /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2494  XXH32_state_t state;
2495  XXH32_reset(&state, seed);
2496  XXH32_update(&state, (const xxh_u8*)input, len);
2497  return XXH32_digest(&state);
2498 #else
2499  if (XXH_FORCE_ALIGN_CHECK) {
2500  if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
2501  return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2502  } }
2503 
2504  return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2505 #endif
2506 }
2507 
2508 
2509 
2510 /******* Hash streaming *******/
2511 #ifndef XXH_NO_STREAM
2512 
2514 {
2515  return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
2516 }
2519 {
2520  XXH_free(statePtr);
2521  return XXH_OK;
2522 }
2523 
2525 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
2526 {
2527  XXH_memcpy(dstState, srcState, sizeof(*dstState));
2528 }
2529 
2531 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
2532 {
2533  XXH_ASSERT(statePtr != NULL);
2534  memset(statePtr, 0, sizeof(*statePtr));
2535  statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2536  statePtr->v[1] = seed + XXH_PRIME32_2;
2537  statePtr->v[2] = seed + 0;
2538  statePtr->v[3] = seed - XXH_PRIME32_1;
2539  return XXH_OK;
2540 }
2541 
2542 
2545 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2546 {
2547  if (input==NULL) {
2548  XXH_ASSERT(len == 0);
2549  return XXH_OK;
2550  }
2551 
2552  { const xxh_u8* p = (const xxh_u8*)input;
2553  const xxh_u8* const bEnd = p + len;
2554 
2555  state->total_len_32 += (XXH32_hash_t)len;
2556  state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2557 
2558  if (state->memsize + len < 16) { /* fill in tmp buffer */
2559  XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
2560  state->memsize += (XXH32_hash_t)len;
2561  return XXH_OK;
2562  }
2563 
2564  if (state->memsize) { /* some data left from previous update */
2565  XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
2566  { const xxh_u32* p32 = state->mem32;
2567  state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2568  state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2569  state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2570  state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2571  }
2572  p += 16-state->memsize;
2573  state->memsize = 0;
2574  }
2575 
2576  if (p <= bEnd-16) {
2577  const xxh_u8* const limit = bEnd - 16;
2578 
2579  do {
2580  state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
2581  state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
2582  state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
2583  state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
2584  } while (p<=limit);
2585 
2586  }
2587 
2588  if (p < bEnd) {
2589  XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
2590  state->memsize = (unsigned)(bEnd-p);
2591  }
2592  }
2593 
2594  return XXH_OK;
2595 }
2596 
2597 
2599 XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
2600 {
2601  xxh_u32 h32;
2602 
2603  if (state->large_len) {
2604  h32 = XXH_rotl32(state->v[0], 1)
2605  + XXH_rotl32(state->v[1], 7)
2606  + XXH_rotl32(state->v[2], 12)
2607  + XXH_rotl32(state->v[3], 18);
2608  } else {
2609  h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
2610  }
2611 
2612  h32 += state->total_len_32;
2613 
2614  return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2615 }
2616 #endif /* !XXH_NO_STREAM */
2617 
2618 /******* Canonical representation *******/
2619 
2635 {
2636  XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
2637  if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
2638  XXH_memcpy(dst, &hash, sizeof(*dst));
2639 }
2642 {
2643  return XXH_readBE32(src);
2644 }
2645 
2646 
2647 #ifndef XXH_NO_LONG_LONG
2648 
2649 /* *******************************************************************
2650 * 64-bit hash functions
2651 *********************************************************************/
2657 /******* Memory access *******/
2658 
2659 typedef XXH64_hash_t xxh_u64;
2660 
2661 #ifdef XXH_OLD_NAMES
2662 # define U64 xxh_u64
2663 #endif
2664 
2665 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2666 /*
2667  * Manual byteshift. Best for old compilers which don't inline memcpy.
2668  * We actually directly use XXH_readLE64 and XXH_readBE64.
2669  */
2670 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2671 
2672 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
2673 static xxh_u64 XXH_read64(const void* memPtr)
2674 {
2675  return *(const xxh_u64*) memPtr;
2676 }
2677 
2678 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2679 
2680 /*
2681  * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
2682  * documentation claimed that it only increased the alignment, but actually it
2683  * can decrease it on gcc, clang, and icc:
2684  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
2685  * https://gcc.godbolt.org/z/xYez1j67Y.
2686  */
2687 #ifdef XXH_OLD_NAMES
2688 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
2689 #endif
2690 static xxh_u64 XXH_read64(const void* ptr)
2691 {
2692  typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
2693  return *((const xxh_unalign64*)ptr);
2694 }
2695 
2696 #else
2697 
2698 /*
2699  * Portable and safe solution. Generally efficient.
2700  * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2701  */
2702 static xxh_u64 XXH_read64(const void* memPtr)
2703 {
2704  xxh_u64 val;
2705  XXH_memcpy(&val, memPtr, sizeof(val));
2706  return val;
2707 }
2708 
2709 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2710 
2711 #if defined(_MSC_VER) /* Visual Studio */
2712 # define XXH_swap64 _byteswap_uint64
2713 #elif XXH_GCC_VERSION >= 403
2714 # define XXH_swap64 __builtin_bswap64
2715 #else
2716 static xxh_u64 XXH_swap64(xxh_u64 x)
2717 {
2718  return ((x << 56) & 0xff00000000000000ULL) |
2719  ((x << 40) & 0x00ff000000000000ULL) |
2720  ((x << 24) & 0x0000ff0000000000ULL) |
2721  ((x << 8) & 0x000000ff00000000ULL) |
2722  ((x >> 8) & 0x00000000ff000000ULL) |
2723  ((x >> 24) & 0x0000000000ff0000ULL) |
2724  ((x >> 40) & 0x000000000000ff00ULL) |
2725  ((x >> 56) & 0x00000000000000ffULL);
2726 }
2727 #endif
2728 
2729 
2730 /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
2731 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2732 
2733 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
2734 {
2735  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2736  return bytePtr[0]
2737  | ((xxh_u64)bytePtr[1] << 8)
2738  | ((xxh_u64)bytePtr[2] << 16)
2739  | ((xxh_u64)bytePtr[3] << 24)
2740  | ((xxh_u64)bytePtr[4] << 32)
2741  | ((xxh_u64)bytePtr[5] << 40)
2742  | ((xxh_u64)bytePtr[6] << 48)
2743  | ((xxh_u64)bytePtr[7] << 56);
2744 }
2745 
2746 XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
2747 {
2748  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2749  return bytePtr[7]
2750  | ((xxh_u64)bytePtr[6] << 8)
2751  | ((xxh_u64)bytePtr[5] << 16)
2752  | ((xxh_u64)bytePtr[4] << 24)
2753  | ((xxh_u64)bytePtr[3] << 32)
2754  | ((xxh_u64)bytePtr[2] << 40)
2755  | ((xxh_u64)bytePtr[1] << 48)
2756  | ((xxh_u64)bytePtr[0] << 56);
2757 }
2758 
2759 #else
2760 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
2761 {
2762  return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
2763 }
2764 
2765 static xxh_u64 XXH_readBE64(const void* ptr)
2766 {
2767  return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
2768 }
2769 #endif
2770 
2771 XXH_FORCE_INLINE xxh_u64
2772 XXH_readLE64_align(const void* ptr, XXH_alignment align)
2773 {
2774  if (align==XXH_unaligned)
2775  return XXH_readLE64(ptr);
2776  else
2777  return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
2778 }
2779 
2780 
2781 /******* xxh64 *******/
2790 /* #define rather that static const, to be used as initializers */
2791 #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL
2792 #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL
2793 #define XXH_PRIME64_3 0x165667B19E3779F9ULL
2794 #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL
2795 #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL
2797 #ifdef XXH_OLD_NAMES
2798 # define PRIME64_1 XXH_PRIME64_1
2799 # define PRIME64_2 XXH_PRIME64_2
2800 # define PRIME64_3 XXH_PRIME64_3
2801 # define PRIME64_4 XXH_PRIME64_4
2802 # define PRIME64_5 XXH_PRIME64_5
2803 #endif
2804 
2806 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
2807 {
2808  acc += input * XXH_PRIME64_2;
2809  acc = XXH_rotl64(acc, 31);
2810  acc *= XXH_PRIME64_1;
2811  return acc;
2812 }
2813 
2814 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
2815 {
2816  val = XXH64_round(0, val);
2817  acc ^= val;
2818  acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
2819  return acc;
2820 }
2821 
2823 static xxh_u64 XXH64_avalanche(xxh_u64 hash)
2824 {
2825  hash ^= hash >> 33;
2826  hash *= XXH_PRIME64_2;
2827  hash ^= hash >> 29;
2828  hash *= XXH_PRIME64_3;
2829  hash ^= hash >> 32;
2830  return hash;
2831 }
2832 
2833 
2834 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
2835 
2851 static XXH_PUREF xxh_u64
2852 XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2853 {
2854  if (ptr==NULL) XXH_ASSERT(len == 0);
2855  len &= 31;
2856  while (len >= 8) {
2857  xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
2858  ptr += 8;
2859  hash ^= k1;
2860  hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2861  len -= 8;
2862  }
2863  if (len >= 4) {
2864  hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
2865  ptr += 4;
2866  hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2867  len -= 4;
2868  }
2869  while (len > 0) {
2870  hash ^= (*ptr++) * XXH_PRIME64_5;
2871  hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
2872  --len;
2873  }
2874  return XXH64_avalanche(hash);
2875 }
2876 
2877 #ifdef XXH_OLD_NAMES
2878 # define PROCESS1_64 XXH_PROCESS1_64
2879 # define PROCESS4_64 XXH_PROCESS4_64
2880 # define PROCESS8_64 XXH_PROCESS8_64
2881 #else
2882 # undef XXH_PROCESS1_64
2883 # undef XXH_PROCESS4_64
2884 # undef XXH_PROCESS8_64
2885 #endif
2886 
2895 XXH_FORCE_INLINE XXH_PUREF xxh_u64
2896 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
2897 {
2898  xxh_u64 h64;
2899  if (input==NULL) XXH_ASSERT(len == 0);
2900 
2901  if (len>=32) {
2902  const xxh_u8* const bEnd = input + len;
2903  const xxh_u8* const limit = bEnd - 31;
2904  xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2905  xxh_u64 v2 = seed + XXH_PRIME64_2;
2906  xxh_u64 v3 = seed + 0;
2907  xxh_u64 v4 = seed - XXH_PRIME64_1;
2908 
2909  do {
2910  v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
2911  v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
2912  v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
2913  v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
2914  } while (input<limit);
2915 
2916  h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2917  h64 = XXH64_mergeRound(h64, v1);
2918  h64 = XXH64_mergeRound(h64, v2);
2919  h64 = XXH64_mergeRound(h64, v3);
2920  h64 = XXH64_mergeRound(h64, v4);
2921 
2922  } else {
2923  h64 = seed + XXH_PRIME64_5;
2924  }
2925 
2926  h64 += (xxh_u64) len;
2927 
2928  return XXH64_finalize(h64, input, len, align);
2929 }
2930 
2931 
2933 XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
2934 {
2935 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2936  /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2937  XXH64_state_t state;
2938  XXH64_reset(&state, seed);
2939  XXH64_update(&state, (const xxh_u8*)input, len);
2940  return XXH64_digest(&state);
2941 #else
2942  if (XXH_FORCE_ALIGN_CHECK) {
2943  if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
2944  return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2945  } }
2946 
2947  return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2948 
2949 #endif
2950 }
2951 
2952 /******* Hash Streaming *******/
2953 #ifndef XXH_NO_STREAM
2954 
2956 {
2957  return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
2958 }
2961 {
2962  XXH_free(statePtr);
2963  return XXH_OK;
2964 }
2965 
2968 {
2969  XXH_memcpy(dstState, srcState, sizeof(*dstState));
2970 }
2971 
2974 {
2975  XXH_ASSERT(statePtr != NULL);
2976  memset(statePtr, 0, sizeof(*statePtr));
2977  statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2978  statePtr->v[1] = seed + XXH_PRIME64_2;
2979  statePtr->v[2] = seed + 0;
2980  statePtr->v[3] = seed - XXH_PRIME64_1;
2981  return XXH_OK;
2982 }
2983 
2986 XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
2987 {
2988  if (input==NULL) {
2989  XXH_ASSERT(len == 0);
2990  return XXH_OK;
2991  }
2992 
2993  { const xxh_u8* p = (const xxh_u8*)input;
2994  const xxh_u8* const bEnd = p + len;
2995 
2996  state->total_len += len;
2997 
2998  if (state->memsize + len < 32) { /* fill in tmp buffer */
2999  XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
3000  state->memsize += (xxh_u32)len;
3001  return XXH_OK;
3002  }
3003 
3004  if (state->memsize) { /* tmp buffer is full */
3005  XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
3006  state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
3007  state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
3008  state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
3009  state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
3010  p += 32 - state->memsize;
3011  state->memsize = 0;
3012  }
3013 
3014  if (p+32 <= bEnd) {
3015  const xxh_u8* const limit = bEnd - 32;
3016 
3017  do {
3018  state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
3019  state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
3020  state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
3021  state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
3022  } while (p<=limit);
3023 
3024  }
3025 
3026  if (p < bEnd) {
3027  XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
3028  state->memsize = (unsigned)(bEnd-p);
3029  }
3030  }
3031 
3032  return XXH_OK;
3033 }
3034 
3035 
3038 {
3039  xxh_u64 h64;
3040 
3041  if (state->total_len >= 32) {
3042  h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
3043  h64 = XXH64_mergeRound(h64, state->v[0]);
3044  h64 = XXH64_mergeRound(h64, state->v[1]);
3045  h64 = XXH64_mergeRound(h64, state->v[2]);
3046  h64 = XXH64_mergeRound(h64, state->v[3]);
3047  } else {
3048  h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
3049  }
3050 
3051  h64 += (xxh_u64) state->total_len;
3052 
3053  return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
3054 }
3055 #endif /* !XXH_NO_STREAM */
3056 
3057 /******* Canonical representation *******/
3058 
3061 {
3062  XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
3063  if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
3064  XXH_memcpy(dst, &hash, sizeof(*dst));
3065 }
3066 
3069 {
3070  return XXH_readBE64(src);
3071 }
3072 
3073 #ifndef XXH_NO_XXH3
3074 
3075 /* *********************************************************************
3076 * XXH3
3077 * New generation hash designed for speed on small keys and vectorization
3078 ************************************************************************ */
3086 /* === Compiler specifics === */
3087 
3088 #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
3089 # define XXH_RESTRICT /* disable */
3090 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
3091 # define XXH_RESTRICT restrict
3092 #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
3093  || (defined (__clang__)) \
3094  || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
3095  || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
3096 /*
3097  * There are a LOT more compilers that recognize __restrict but this
3098  * covers the major ones.
3099  */
3100 # define XXH_RESTRICT __restrict
3101 #else
3102 # define XXH_RESTRICT /* disable */
3103 #endif
3104 
3105 #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
3106  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
3107  || defined(__clang__)
3108 # define XXH_likely(x) __builtin_expect(x, 1)
3109 # define XXH_unlikely(x) __builtin_expect(x, 0)
3110 #else
3111 # define XXH_likely(x) (x)
3112 # define XXH_unlikely(x) (x)
3113 #endif
3114 
3115 #if defined(__GNUC__) || defined(__clang__)
3116 # if defined(__ARM_FEATURE_SVE)
3117 # include <arm_sve.h>
3118 # endif
3119 # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3120  || (defined(_M_ARM) && _M_ARM >= 7) \
3121  || defined(_M_ARM64) || defined(_M_ARM64EC)
3122 # define inline __inline__ /* circumvent a clang bug */
3123 # include <arm_neon.h>
3124 # undef inline
3125 # elif defined(__AVX2__)
3126 # include <immintrin.h>
3127 # elif defined(__SSE2__)
3128 # include <emmintrin.h>
3129 # endif
3130 #endif
3131 
3132 #if defined(_MSC_VER)
3133 # include <intrin.h>
3134 #endif
3135 
3136 /*
3137  * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
3138  * remaining a true 64-bit/128-bit hash function.
3139  *
3140  * This is done by prioritizing a subset of 64-bit operations that can be
3141  * emulated without too many steps on the average 32-bit machine.
3142  *
3143  * For example, these two lines seem similar, and run equally fast on 64-bit:
3144  *
3145  * xxh_u64 x;
3146  * x ^= (x >> 47); // good
3147  * x ^= (x >> 13); // bad
3148  *
3149  * However, to a 32-bit machine, there is a major difference.
3150  *
3151  * x ^= (x >> 47) looks like this:
3152  *
3153  * x.lo ^= (x.hi >> (47 - 32));
3154  *
3155  * while x ^= (x >> 13) looks like this:
3156  *
3157  * // note: funnel shifts are not usually cheap.
3158  * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
3159  * x.hi ^= (x.hi >> 13);
3160  *
3161  * The first one is significantly faster than the second, simply because the
3162  * shift is larger than 32. This means:
3163  * - All the bits we need are in the upper 32 bits, so we can ignore the lower
3164  * 32 bits in the shift.
3165  * - The shift result will always fit in the lower 32 bits, and therefore,
3166  * we can ignore the upper 32 bits in the xor.
3167  *
3168  * Thanks to this optimization, XXH3 only requires these features to be efficient:
3169  *
3170  * - Usable unaligned access
3171  * - A 32-bit or 64-bit ALU
3172  * - If 32-bit, a decent ADC instruction
3173  * - A 32 or 64-bit multiply with a 64-bit result
3174  * - For the 128-bit variant, a decent byteswap helps short inputs.
3175  *
3176  * The first two are already required by XXH32, and almost all 32-bit and 64-bit
3177  * platforms which can run XXH32 can run XXH3 efficiently.
3178  *
3179  * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
3180  * notable exception.
3181  *
3182  * First of all, Thumb-1 lacks support for the UMULL instruction which
3183  * performs the important long multiply. This means numerous __aeabi_lmul
3184  * calls.
3185  *
3186  * Second of all, the 8 functional registers are just not enough.
3187  * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
3188  * Lo registers, and this shuffling results in thousands more MOVs than A32.
3189  *
3190  * A32 and T32 don't have this limitation. They can access all 14 registers,
3191  * do a 32->64 multiply with UMULL, and the flexible operand allowing free
3192  * shifts is helpful, too.
3193  *
3194  * Therefore, we do a quick sanity check.
3195  *
3196  * If compiling Thumb-1 for a target which supports ARM instructions, we will
3197  * emit a warning, as it is not a "sane" platform to compile for.
3198  *
3199  * Usually, if this happens, it is because of an accident and you probably need
3200  * to specify -march, as you likely meant to compile for a newer architecture.
3201  *
3202  * Credit: large sections of the vectorial and asm source code paths
3203  * have been contributed by @easyaspi314
3204  */
3205 #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
3206 # warning "XXH3 is highly inefficient without ARM or Thumb-2."
3207 #endif
3208 
3209 /* ==========================================
3210  * Vectorization detection
3211  * ========================================== */
3212 
3213 #ifdef XXH_DOXYGEN
3214 
3224 # define XXH_VECTOR XXH_SCALAR
3225 
3234 enum XXH_VECTOR_TYPE /* fake enum */ {
3235  XXH_SCALAR = 0,
3236  XXH_SSE2 = 1,
3242  XXH_AVX2 = 2,
3243  XXH_AVX512 = 3,
3244  XXH_NEON = 4,
3245  XXH_VSX = 5,
3246  XXH_SVE = 6,
3247 };
3257 # define XXH_ACC_ALIGN 8
3258 #endif
3259 
3260 /* Actual definition */
3261 #ifndef XXH_DOXYGEN
3262 # define XXH_SCALAR 0
3263 # define XXH_SSE2 1
3264 # define XXH_AVX2 2
3265 # define XXH_AVX512 3
3266 # define XXH_NEON 4
3267 # define XXH_VSX 5
3268 # define XXH_SVE 6
3269 #endif
3270 
3271 #ifndef XXH_VECTOR /* can be defined on command line */
3272 # if defined(__ARM_FEATURE_SVE)
3273 # define XXH_VECTOR XXH_SVE
3274 # elif ( \
3275  defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
3276  || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
3277  ) && ( \
3278  defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
3279  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3280  )
3281 # define XXH_VECTOR XXH_NEON
3282 # elif defined(__AVX512F__)
3283 # define XXH_VECTOR XXH_AVX512
3284 # elif defined(__AVX2__)
3285 # define XXH_VECTOR XXH_AVX2
3286 # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
3287 # define XXH_VECTOR XXH_SSE2
3288 # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
3289  || (defined(__s390x__) && defined(__VEC__)) \
3290  && defined(__GNUC__) /* TODO: IBM XL */
3291 # define XXH_VECTOR XXH_VSX
3292 # else
3293 # define XXH_VECTOR XXH_SCALAR
3294 # endif
3295 #endif
3296 
3297 /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
3298 #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
3299 # ifdef _MSC_VER
3300 # pragma warning(once : 4606)
3301 # else
3302 # warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
3303 # endif
3304 # undef XXH_VECTOR
3305 # define XXH_VECTOR XXH_SCALAR
3306 #endif
3307 
3308 /*
3309  * Controls the alignment of the accumulator,
3310  * for compatibility with aligned vector loads, which are usually faster.
3311  */
3312 #ifndef XXH_ACC_ALIGN
3313 # if defined(XXH_X86DISPATCH)
3314 # define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
3315 # elif XXH_VECTOR == XXH_SCALAR /* scalar */
3316 # define XXH_ACC_ALIGN 8
3317 # elif XXH_VECTOR == XXH_SSE2 /* sse2 */
3318 # define XXH_ACC_ALIGN 16
3319 # elif XXH_VECTOR == XXH_AVX2 /* avx2 */
3320 # define XXH_ACC_ALIGN 32
3321 # elif XXH_VECTOR == XXH_NEON /* neon */
3322 # define XXH_ACC_ALIGN 16
3323 # elif XXH_VECTOR == XXH_VSX /* vsx */
3324 # define XXH_ACC_ALIGN 16
3325 # elif XXH_VECTOR == XXH_AVX512 /* avx512 */
3326 # define XXH_ACC_ALIGN 64
3327 # elif XXH_VECTOR == XXH_SVE /* sve */
3328 # define XXH_ACC_ALIGN 64
3329 # endif
3330 #endif
3331 
3332 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
3333  || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
3334 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3335 #elif XXH_VECTOR == XXH_SVE
3336 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3337 #else
3338 # define XXH_SEC_ALIGN 8
3339 #endif
3340 
3341 #if defined(__GNUC__) || defined(__clang__)
3342 # define XXH_ALIASING __attribute__((may_alias))
3343 #else
3344 # define XXH_ALIASING /* nothing */
3345 #endif
3346 
3347 /*
3348  * UGLY HACK:
3349  * GCC usually generates the best code with -O3 for xxHash.
3350  *
3351  * However, when targeting AVX2, it is overzealous in its unrolling resulting
3352  * in code roughly 3/4 the speed of Clang.
3353  *
3354  * There are other issues, such as GCC splitting _mm256_loadu_si256 into
3355  * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
3356  * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
3357  *
3358  * That is why when compiling the AVX2 version, it is recommended to use either
3359  * -O2 -mavx2 -march=haswell
3360  * or
3361  * -O2 -mavx2 -mno-avx256-split-unaligned-load
3362  * for decent performance, or to use Clang instead.
3363  *
3364  * Fortunately, we can control the first one with a pragma that forces GCC into
3365  * -O2, but the other one we can't control without "failed to inline always
3366  * inline function due to target mismatch" warnings.
3367  */
3368 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
3369  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
3370  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
3371 # pragma GCC push_options
3372 # pragma GCC optimize("-O2")
3373 #endif
3374 
3375 #if XXH_VECTOR == XXH_NEON
3376 
3377 /*
3378  * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
3379  * optimizes out the entire hashLong loop because of the aliasing violation.
3380  *
3381  * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
3382  * so the only option is to mark it as aliasing.
3383  */
3384 typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
3385 
3399 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3400 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
3401 {
3402  return *(xxh_aliasing_uint64x2_t const *)ptr;
3403 }
3404 #else
3405 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
3406 {
3407  return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
3408 }
3409 #endif
3410 
3419 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
3420 XXH_FORCE_INLINE uint64x2_t
3421 XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3422 {
3423  /* Inline assembly is the only way */
3424  __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
3425  return acc;
3426 }
3427 XXH_FORCE_INLINE uint64x2_t
3428 XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3429 {
3430  /* This intrinsic works as expected */
3431  return vmlal_high_u32(acc, lhs, rhs);
3432 }
3433 #else
3434 /* Portable intrinsic versions */
3435 XXH_FORCE_INLINE uint64x2_t
3436 XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3437 {
3438  return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
3439 }
3442 XXH_FORCE_INLINE uint64x2_t
3443 XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3444 {
3445  return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
3446 }
3447 #endif
3448 
3484 # ifndef XXH3_NEON_LANES
3485 # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3486  && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
3487 # define XXH3_NEON_LANES 6
3488 # else
3489 # define XXH3_NEON_LANES XXH_ACC_NB
3490 # endif
3491 # endif
3492 #endif /* XXH_VECTOR == XXH_NEON */
3493 
3494 /*
3495  * VSX and Z Vector helpers.
3496  *
3497  * This is very messy, and any pull requests to clean this up are welcome.
3498  *
3499  * There are a lot of problems with supporting VSX and s390x, due to
3500  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
3501  */
3502 #if XXH_VECTOR == XXH_VSX
3503 /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
3504  * and `pixel`. This is a problem for obvious reasons.
3505  *
3506  * These keywords are unnecessary; the spec literally says they are
3507  * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
3508  * after including the header.
3509  *
3510  * We use pragma push_macro/pop_macro to keep the namespace clean. */
3511 # pragma push_macro("bool")
3512 # pragma push_macro("vector")
3513 # pragma push_macro("pixel")
3514 /* silence potential macro redefined warnings */
3515 # undef bool
3516 # undef vector
3517 # undef pixel
3518 
3519 # if defined(__s390x__)
3520 # include <s390intrin.h>
3521 # else
3522 # include <altivec.h>
3523 # endif
3524 
3525 /* Restore the original macro values, if applicable. */
3526 # pragma pop_macro("pixel")
3527 # pragma pop_macro("vector")
3528 # pragma pop_macro("bool")
3529 
3530 typedef __vector unsigned long long xxh_u64x2;
3531 typedef __vector unsigned char xxh_u8x16;
3532 typedef __vector unsigned xxh_u32x4;
3533 
3534 /*
3535  * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
3536  */
3537 typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
3538 
3539 # ifndef XXH_VSX_BE
3540 # if defined(__BIG_ENDIAN__) \
3541  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3542 # define XXH_VSX_BE 1
3543 # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3544 # warning "-maltivec=be is not recommended. Please use native endianness."
3545 # define XXH_VSX_BE 1
3546 # else
3547 # define XXH_VSX_BE 0
3548 # endif
3549 # endif /* !defined(XXH_VSX_BE) */
3550 
3551 # if XXH_VSX_BE
3552 # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3553 # define XXH_vec_revb vec_revb
3554 # else
3555 
3558 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
3559 {
3560  xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3561  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3562  return vec_perm(val, val, vByteSwap);
3563 }
3564 # endif
3565 # endif /* XXH_VSX_BE */
3566 
3570 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
3571 {
3572  xxh_u64x2 ret;
3573  XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
3574 # if XXH_VSX_BE
3575  ret = XXH_vec_revb(ret);
3576 # endif
3577  return ret;
3578 }
3579 
3580 /*
3581  * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
3582  *
3583  * These intrinsics weren't added until GCC 8, despite existing for a while,
3584  * and they are endian dependent. Also, their meaning swap depending on version.
3585  * */
3586 # if defined(__s390x__)
3587  /* s390x is always big endian, no issue on this platform */
3588 # define XXH_vec_mulo vec_mulo
3589 # define XXH_vec_mule vec_mule
3590 # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
3591 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
3592  /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
3593 # define XXH_vec_mulo __builtin_altivec_vmulouw
3594 # define XXH_vec_mule __builtin_altivec_vmuleuw
3595 # else
3596 /* gcc needs inline assembly */
3597 /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
3598 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
3599 {
3600  xxh_u64x2 result;
3601  __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3602  return result;
3603 }
3604 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
3605 {
3606  xxh_u64x2 result;
3607  __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3608  return result;
3609 }
3610 # endif /* XXH_vec_mulo, XXH_vec_mule */
3611 #endif /* XXH_VECTOR == XXH_VSX */
3612 
3613 #if XXH_VECTOR == XXH_SVE
3614 #define ACCRND(acc, offset) \
3615 do { \
3616  svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
3617  svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
3618  svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
3619  svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
3620  svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
3621  svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
3622  svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
3623  acc = svadd_u64_x(mask, acc, mul); \
3624 } while (0)
3625 #endif /* XXH_VECTOR == XXH_SVE */
3626 
3627 
3628 /* prefetch
3629  * can be disabled, by declaring XXH_NO_PREFETCH build macro */
3630 #if defined(XXH_NO_PREFETCH)
3631 # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3632 #else
3633 # if XXH_SIZE_OPT >= 1
3634 # define XXH_PREFETCH(ptr) (void)(ptr)
3635 # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
3636 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3637 # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3638 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3639 # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
3640 # else
3641 # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3642 # endif
3643 #endif /* XXH_NO_PREFETCH */
3644 
3645 
3646 /* ==========================================
3647  * XXH3 default settings
3648  * ========================================== */
3649 
3650 #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
3651 
3652 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3653 # error "default keyset is not large enough"
3654 #endif
3655 
3657 XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3658  0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3659  0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3660  0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3661  0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3662  0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3663  0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3664  0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3665  0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3666  0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3667  0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3668  0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3669  0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3670 };
3671 
3672 static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;
3673 static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;
3675 #ifdef XXH_OLD_NAMES
3676 # define kSecret XXH3_kSecret
3677 #endif
3678 
3679 #ifdef XXH_DOXYGEN
3680 
3696 XXH_FORCE_INLINE xxh_u64
3697 XXH_mult32to64(xxh_u64 x, xxh_u64 y)
3698 {
3699  return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
3700 }
3701 #elif defined(_MSC_VER) && defined(_M_IX86)
3702 # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3703 #else
3704 /*
3705  * Downcast + upcast is usually better than masking on older compilers like
3706  * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
3707  *
3708  * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
3709  * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
3710  */
3711 # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
3712 #endif
3713 
3723 static XXH128_hash_t
3724 XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3725 {
3726  /*
3727  * GCC/Clang __uint128_t method.
3728  *
3729  * On most 64-bit targets, GCC and Clang define a __uint128_t type.
3730  * This is usually the best way as it usually uses a native long 64-bit
3731  * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
3732  *
3733  * Usually.
3734  *
3735  * Despite being a 32-bit platform, Clang (and emscripten) define this type
3736  * despite not having the arithmetic for it. This results in a laggy
3737  * compiler builtin call which calculates a full 128-bit multiply.
3738  * In that case it is best to use the portable one.
3739  * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
3740  */
3741 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
3742  && defined(__SIZEOF_INT128__) \
3743  || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3744 
3745  __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
3746  XXH128_hash_t r128;
3747  r128.low64 = (xxh_u64)(product);
3748  r128.high64 = (xxh_u64)(product >> 64);
3749  return r128;
3750 
3751  /*
3752  * MSVC for x64's _umul128 method.
3753  *
3754  * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
3755  *
3756  * This compiles to single operand MUL on x64.
3757  */
3758 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
3759 
3760 #ifndef _MSC_VER
3761 # pragma intrinsic(_umul128)
3762 #endif
3763  xxh_u64 product_high;
3764  xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
3765  XXH128_hash_t r128;
3766  r128.low64 = product_low;
3767  r128.high64 = product_high;
3768  return r128;
3769 
3770  /*
3771  * MSVC for ARM64's __umulh method.
3772  *
3773  * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
3774  */
3775 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
3776 
3777 #ifndef _MSC_VER
3778 # pragma intrinsic(__umulh)
3779 #endif
3780  XXH128_hash_t r128;
3781  r128.low64 = lhs * rhs;
3782  r128.high64 = __umulh(lhs, rhs);
3783  return r128;
3784 
3785 #else
3786  /*
3787  * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
3788  *
3789  * This is a fast and simple grade school multiply, which is shown below
3790  * with base 10 arithmetic instead of base 0x100000000.
3791  *
3792  * 9 3 // D2 lhs = 93
3793  * x 7 5 // D2 rhs = 75
3794  * ----------
3795  * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
3796  * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
3797  * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
3798  * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
3799  * ---------
3800  * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
3801  * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
3802  * ---------
3803  * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
3804  *
3805  * The reasons for adding the products like this are:
3806  * 1. It avoids manual carry tracking. Just like how
3807  * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
3808  * This avoids a lot of complexity.
3809  *
3810  * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
3811  * instruction available in ARM's Digital Signal Processing extension
3812  * in 32-bit ARMv6 and later, which is shown below:
3813  *
3814  * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
3815  * {
3816  * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
3817  * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
3818  * *RdHi = (xxh_u32)(product >> 32);
3819  * }
3820  *
3821  * This instruction was designed for efficient long multiplication, and
3822  * allows this to be calculated in only 4 instructions at speeds
3823  * comparable to some 64-bit ALUs.
3824  *
3825  * 3. It isn't terrible on other platforms. Usually this will be a couple
3826  * of 32-bit ADD/ADCs.
3827  */
3828 
3829  /* First calculate all of the cross products. */
3830  xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
3831  xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
3832  xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
3833  xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
3834 
3835  /* Now add the products together. These will never overflow. */
3836  xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
3837  xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
3838  xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
3839 
3840  XXH128_hash_t r128;
3841  r128.low64 = lower;
3842  r128.high64 = upper;
3843  return r128;
3844 #endif
3845 }
3846 
3857 static xxh_u64
3858 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3859 {
3860  XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
3861  return product.low64 ^ product.high64;
3862 }
3863 
3865 XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
3866 {
3867  XXH_ASSERT(0 <= shift && shift < 64);
3868  return v64 ^ (v64 >> shift);
3869 }
3870 
3871 /*
3872  * This is a fast avalanche stage,
3873  * suitable when input bits are already partially mixed
3874  */
3875 static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
3876 {
3877  h64 = XXH_xorshift64(h64, 37);
3878  h64 *= PRIME_MX1;
3879  h64 = XXH_xorshift64(h64, 32);
3880  return h64;
3881 }
3882 
3883 /*
3884  * This is a stronger avalanche,
3885  * inspired by Pelle Evensen's rrmxmx
3886  * preferable when input has not been previously mixed
3887  */
3888 static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3889 {
3890  /* this mix is inspired by Pelle Evensen's rrmxmx */
3891  h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
3892  h64 *= PRIME_MX2;
3893  h64 ^= (h64 >> 35) + len ;
3894  h64 *= PRIME_MX2;
3895  return XXH_xorshift64(h64, 28);
3896 }
3897 
3898 
3899 /* ==========================================
3900  * Short keys
3901  * ==========================================
3902  * One of the shortcomings of XXH32 and XXH64 was that their performance was
3903  * sub-optimal on short lengths. It used an iterative algorithm which strongly
3904  * favored lengths that were a multiple of 4 or 8.
3905  *
3906  * Instead of iterating over individual inputs, we use a set of single shot
3907  * functions which piece together a range of lengths and operate in constant time.
3908  *
3909  * Additionally, the number of multiplies has been significantly reduced. This
3910  * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
3911  *
3912  * Depending on the platform, this may or may not be faster than XXH32, but it
3913  * is almost guaranteed to be faster than XXH64.
3914  */
3915 
3916 /*
3917  * At very short lengths, there isn't enough input to fully hide secrets, or use
3918  * the entire secret.
3919  *
3920  * There is also only a limited amount of mixing we can do before significantly
3921  * impacting performance.
3922  *
3923  * Therefore, we use different sections of the secret and always mix two secret
3924  * samples with an XOR. This should have no effect on performance on the
3925  * seedless or withSeed variants because everything _should_ be constant folded
3926  * by modern compilers.
3927  *
3928  * The XOR mixing hides individual parts of the secret and increases entropy.
3929  *
3930  * This adds an extra layer of strength for custom secrets.
3931  */
3932 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3933 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3934 {
3935  XXH_ASSERT(input != NULL);
3936  XXH_ASSERT(1 <= len && len <= 3);
3937  XXH_ASSERT(secret != NULL);
3938  /*
3939  * len = 1: combined = { input[0], 0x01, input[0], input[0] }
3940  * len = 2: combined = { input[1], 0x02, input[0], input[1] }
3941  * len = 3: combined = { input[2], 0x03, input[0], input[1] }
3942  */
3943  { xxh_u8 const c1 = input[0];
3944  xxh_u8 const c2 = input[len >> 1];
3945  xxh_u8 const c3 = input[len - 1];
3946  xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)
3947  | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
3948  xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
3949  xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
3950  return XXH64_avalanche(keyed);
3951  }
3952 }
3953 
3954 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3955 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3956 {
3957  XXH_ASSERT(input != NULL);
3958  XXH_ASSERT(secret != NULL);
3959  XXH_ASSERT(4 <= len && len <= 8);
3960  seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
3961  { xxh_u32 const input1 = XXH_readLE32(input);
3962  xxh_u32 const input2 = XXH_readLE32(input + len - 4);
3963  xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
3964  xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
3965  xxh_u64 const keyed = input64 ^ bitflip;
3966  return XXH3_rrmxmx(keyed, len);
3967  }
3968 }
3969 
3970 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3971 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3972 {
3973  XXH_ASSERT(input != NULL);
3974  XXH_ASSERT(secret != NULL);
3975  XXH_ASSERT(9 <= len && len <= 16);
3976  { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
3977  xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
3978  xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
3979  xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
3980  xxh_u64 const acc = len
3981  + XXH_swap64(input_lo) + input_hi
3982  + XXH3_mul128_fold64(input_lo, input_hi);
3983  return XXH3_avalanche(acc);
3984  }
3985 }
3986 
3987 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3988 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3989 {
3990  XXH_ASSERT(len <= 16);
3991  { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
3992  if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
3993  if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
3994  return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
3995  }
3996 }
3997 
3998 /*
3999  * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
4000  * multiplication by zero, affecting hashes of lengths 17 to 240.
4001  *
4002  * However, they are very unlikely.
4003  *
4004  * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
4005  * unseeded non-cryptographic hashes, it does not attempt to defend itself
4006  * against specially crafted inputs, only random inputs.
4007  *
4008  * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
4009  * cancelling out the secret is taken an arbitrary number of times (addressed
4010  * in XXH3_accumulate_512), this collision is very unlikely with random inputs
4011  * and/or proper seeding:
4012  *
4013  * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
4014  * function that is only called up to 16 times per hash with up to 240 bytes of
4015  * input.
4016  *
4017  * This is not too bad for a non-cryptographic hash function, especially with
4018  * only 64 bit outputs.
4019  *
4020  * The 128-bit variant (which trades some speed for strength) is NOT affected
4021  * by this, although it is always a good idea to use a proper seed if you care
4022  * about strength.
4023  */
4024 XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
4025  const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
4026 {
4027 #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
4028  && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
4029  && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
4030  /*
4031  * UGLY HACK:
4032  * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
4033  * slower code.
4034  *
4035  * By forcing seed64 into a register, we disrupt the cost model and
4036  * cause it to scalarize. See `XXH32_round()`
4037  *
4038  * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
4039  * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
4040  * GCC 9.2, despite both emitting scalar code.
4041  *
4042  * GCC generates much better scalar code than Clang for the rest of XXH3,
4043  * which is why finding a more optimal codepath is an interest.
4044  */
4045  XXH_COMPILER_GUARD(seed64);
4046 #endif
4047  { xxh_u64 const input_lo = XXH_readLE64(input);
4048  xxh_u64 const input_hi = XXH_readLE64(input+8);
4049  return XXH3_mul128_fold64(
4050  input_lo ^ (XXH_readLE64(secret) + seed64),
4051  input_hi ^ (XXH_readLE64(secret+8) - seed64)
4052  );
4053  }
4054 }
4055 
4056 /* For mid range keys, XXH3 uses a Mum-hash variant. */
4057 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4058 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4059  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4060  XXH64_hash_t seed)
4061 {
4062  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4063  XXH_ASSERT(16 < len && len <= 128);
4064 
4065  { xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
4066 #if XXH_SIZE_OPT >= 1
4067  /* Smaller and cleaner, but slightly slower. */
4068  unsigned int i = (unsigned int)(len - 1) / 32;
4069  do {
4070  acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
4071  acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
4072  } while (i-- != 0);
4073  acc_end = 0;
4074 #else
4075  acc += XXH3_mix16B(input+0, secret+0, seed);
4076  acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
4077  if (len > 32) {
4078  acc += XXH3_mix16B(input+16, secret+32, seed);
4079  acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
4080  if (len > 64) {
4081  acc += XXH3_mix16B(input+32, secret+64, seed);
4082  acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
4083 
4084  if (len > 96) {
4085  acc += XXH3_mix16B(input+48, secret+96, seed);
4086  acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
4087  }
4088  }
4089  }
4090 #endif
4091  return XXH3_avalanche(acc + acc_end);
4092  }
4093 }
4094 
4095 #define XXH3_MIDSIZE_MAX 240
4096 
4097 XXH_NO_INLINE XXH_PUREF XXH64_hash_t
4098 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4099  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4100  XXH64_hash_t seed)
4101 {
4102  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4103  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4104 
4105  #define XXH3_MIDSIZE_STARTOFFSET 3
4106  #define XXH3_MIDSIZE_LASTOFFSET 17
4107 
4108  { xxh_u64 acc = len * XXH_PRIME64_1;
4109  xxh_u64 acc_end;
4110  unsigned int const nbRounds = (unsigned int)len / 16;
4111  unsigned int i;
4112  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4113  for (i=0; i<8; i++) {
4114  acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
4115  }
4116  /* last bytes */
4117  acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
4118  XXH_ASSERT(nbRounds >= 8);
4119  acc = XXH3_avalanche(acc);
4120 #if defined(__clang__) /* Clang */ \
4121  && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
4122  && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
4123  /*
4124  * UGLY HACK:
4125  * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
4126  * In everywhere else, it uses scalar code.
4127  *
4128  * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
4129  * would still be slower than UMAAL (see XXH_mult64to128).
4130  *
4131  * Unfortunately, Clang doesn't handle the long multiplies properly and
4132  * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
4133  * scalarized into an ugly mess of VMOV.32 instructions.
4134  *
4135  * This mess is difficult to avoid without turning autovectorization
4136  * off completely, but they are usually relatively minor and/or not
4137  * worth it to fix.
4138  *
4139  * This loop is the easiest to fix, as unlike XXH32, this pragma
4140  * _actually works_ because it is a loop vectorization instead of an
4141  * SLP vectorization.
4142  */
4143  #pragma clang loop vectorize(disable)
4144 #endif
4145  for (i=8 ; i < nbRounds; i++) {
4146  /*
4147  * Prevents clang for unrolling the acc loop and interleaving with this one.
4148  */
4149  XXH_COMPILER_GUARD(acc);
4150  acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
4151  }
4152  return XXH3_avalanche(acc + acc_end);
4153  }
4154 }
4155 
4156 
4157 /* ======= Long Keys ======= */
4158 
4159 #define XXH_STRIPE_LEN 64
4160 #define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
4161 #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
4162 
4163 #ifdef XXH_OLD_NAMES
4164 # define STRIPE_LEN XXH_STRIPE_LEN
4165 # define ACC_NB XXH_ACC_NB
4166 #endif
4167 
4168 #ifndef XXH_PREFETCH_DIST
4169 # ifdef __clang__
4170 # define XXH_PREFETCH_DIST 320
4171 # else
4172 # if (XXH_VECTOR == XXH_AVX512)
4173 # define XXH_PREFETCH_DIST 512
4174 # else
4175 # define XXH_PREFETCH_DIST 384
4176 # endif
4177 # endif /* __clang__ */
4178 #endif /* XXH_PREFETCH_DIST */
4179 
4180 /*
4181  * These macros are to generate an XXH3_accumulate() function.
4182  * The two arguments select the name suffix and target attribute.
4183  *
4184  * The name of this symbol is XXH3_accumulate_<name>() and it calls
4185  * XXH3_accumulate_512_<name>().
4186  *
4187  * It may be useful to hand implement this function if the compiler fails to
4188  * optimize the inline function.
4189  */
4190 #define XXH3_ACCUMULATE_TEMPLATE(name) \
4191 void \
4192 XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
4193  const xxh_u8* XXH_RESTRICT input, \
4194  const xxh_u8* XXH_RESTRICT secret, \
4195  size_t nbStripes) \
4196 { \
4197  size_t n; \
4198  for (n = 0; n < nbStripes; n++ ) { \
4199  const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
4200  XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
4201  XXH3_accumulate_512_##name( \
4202  acc, \
4203  in, \
4204  secret + n*XXH_SECRET_CONSUME_RATE); \
4205  } \
4206 }
4207 
4208 
4209 XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
4210 {
4211  if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
4212  XXH_memcpy(dst, &v64, sizeof(v64));
4213 }
4214 
4215 /* Several intrinsic functions below are supposed to accept __int64 as argument,
4216  * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
4217  * However, several environments do not define __int64 type,
4218  * requiring a workaround.
4219  */
4220 #if !defined (__VMS) \
4221  && (defined (__cplusplus) \
4222  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
4223  typedef int64_t xxh_i64;
4224 #else
4225  /* the following type must have a width of 64-bit */
4226  typedef long long xxh_i64;
4227 #endif
4228 
4229 
4230 /*
4231  * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
4232  *
4233  * It is a hardened version of UMAC, based off of FARSH's implementation.
4234  *
4235  * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
4236  * implementations, and it is ridiculously fast.
4237  *
4238  * We harden it by mixing the original input to the accumulators as well as the product.
4239  *
4240  * This means that in the (relatively likely) case of a multiply by zero, the
4241  * original input is preserved.
4242  *
4243  * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
4244  * cross-pollination, as otherwise the upper and lower halves would be
4245  * essentially independent.
4246  *
4247  * This doesn't matter on 64-bit hashes since they all get merged together in
4248  * the end, so we skip the extra step.
4249  *
4250  * Both XXH3_64bits and XXH3_128bits use this subroutine.
4251  */
4252 
4253 #if (XXH_VECTOR == XXH_AVX512) \
4254  || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
4255 
4256 #ifndef XXH_TARGET_AVX512
4257 # define XXH_TARGET_AVX512 /* disable attribute target */
4258 #endif
4259 
4260 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4261 XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
4262  const void* XXH_RESTRICT input,
4263  const void* XXH_RESTRICT secret)
4264 {
4265  __m512i* const xacc = (__m512i *) acc;
4266  XXH_ASSERT((((size_t)acc) & 63) == 0);
4267  XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4268 
4269  {
4270  /* data_vec = input[0]; */
4271  __m512i const data_vec = _mm512_loadu_si512 (input);
4272  /* key_vec = secret[0]; */
4273  __m512i const key_vec = _mm512_loadu_si512 (secret);
4274  /* data_key = data_vec ^ key_vec; */
4275  __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
4276  /* data_key_lo = data_key >> 32; */
4277  __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
4278  /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4279  __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
4280  /* xacc[0] += swap(data_vec); */
4281  __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
4282  __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
4283  /* xacc[0] += product; */
4284  *xacc = _mm512_add_epi64(product, sum);
4285  }
4286 }
4287 XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
4288 
4289 /*
4290  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
4291  *
4292  * Multiplication isn't perfect, as explained by Google in HighwayHash:
4293  *
4294  * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
4295  * // varying degrees. In descending order of goodness, bytes
4296  * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
4297  * // As expected, the upper and lower bytes are much worse.
4298  *
4299  * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
4300  *
4301  * Since our algorithm uses a pseudorandom secret to add some variance into the
4302  * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
4303  *
4304  * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
4305  * extraction.
4306  *
4307  * Both XXH3_64bits and XXH3_128bits use this subroutine.
4308  */
4309 
4310 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4311 XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4312 {
4313  XXH_ASSERT((((size_t)acc) & 63) == 0);
4314  XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4315  { __m512i* const xacc = (__m512i*) acc;
4316  const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
4317 
4318  /* xacc[0] ^= (xacc[0] >> 47) */
4319  __m512i const acc_vec = *xacc;
4320  __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
4321  /* xacc[0] ^= secret; */
4322  __m512i const key_vec = _mm512_loadu_si512 (secret);
4323  __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
4324 
4325  /* xacc[0] *= XXH_PRIME32_1; */
4326  __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
4327  __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
4328  __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
4329  *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
4330  }
4331 }
4332 
4333 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4334 XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4335 {
4336  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
4337  XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
4338  XXH_ASSERT(((size_t)customSecret & 63) == 0);
4339  (void)(&XXH_writeLE64);
4340  { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
4341  __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
4342  __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
4343 
4344  const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
4345  __m512i* const dest = ( __m512i*) customSecret;
4346  int i;
4347  XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
4348  XXH_ASSERT(((size_t)dest & 63) == 0);
4349  for (i=0; i < nbRounds; ++i) {
4350  dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
4351  } }
4352 }
4353 
4354 #endif
4355 
4356 #if (XXH_VECTOR == XXH_AVX2) \
4357  || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
4358 
4359 #ifndef XXH_TARGET_AVX2
4360 # define XXH_TARGET_AVX2 /* disable attribute target */
4361 #endif
4362 
4363 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4364 XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
4365  const void* XXH_RESTRICT input,
4366  const void* XXH_RESTRICT secret)
4367 {
4368  XXH_ASSERT((((size_t)acc) & 31) == 0);
4369  { __m256i* const xacc = (__m256i *) acc;
4370  /* Unaligned. This is mainly for pointer arithmetic, and because
4371  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4372  const __m256i* const xinput = (const __m256i *) input;
4373  /* Unaligned. This is mainly for pointer arithmetic, and because
4374  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4375  const __m256i* const xsecret = (const __m256i *) secret;
4376 
4377  size_t i;
4378  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4379  /* data_vec = xinput[i]; */
4380  __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
4381  /* key_vec = xsecret[i]; */
4382  __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4383  /* data_key = data_vec ^ key_vec; */
4384  __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4385  /* data_key_lo = data_key >> 32; */
4386  __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
4387  /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4388  __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
4389  /* xacc[i] += swap(data_vec); */
4390  __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
4391  __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
4392  /* xacc[i] += product; */
4393  xacc[i] = _mm256_add_epi64(product, sum);
4394  } }
4395 }
4396 XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
4397 
4398 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4399 XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4400 {
4401  XXH_ASSERT((((size_t)acc) & 31) == 0);
4402  { __m256i* const xacc = (__m256i*) acc;
4403  /* Unaligned. This is mainly for pointer arithmetic, and because
4404  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4405  const __m256i* const xsecret = (const __m256i *) secret;
4406  const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
4407 
4408  size_t i;
4409  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4410  /* xacc[i] ^= (xacc[i] >> 47) */
4411  __m256i const acc_vec = xacc[i];
4412  __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);
4413  __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);
4414  /* xacc[i] ^= xsecret; */
4415  __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4416  __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4417 
4418  /* xacc[i] *= XXH_PRIME32_1; */
4419  __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
4420  __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
4421  __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
4422  xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
4423  }
4424  }
4425 }
4426 
4427 XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4428 {
4429  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
4430  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
4431  XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
4432  (void)(&XXH_writeLE64);
4433  XXH_PREFETCH(customSecret);
4434  { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
4435 
4436  const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);
4437  __m256i* dest = ( __m256i*) customSecret;
4438 
4439 # if defined(__GNUC__) || defined(__clang__)
4440  /*
4441  * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4442  * - do not extract the secret from sse registers in the internal loop
4443  * - use less common registers, and avoid pushing these reg into stack
4444  */
4445  XXH_COMPILER_GUARD(dest);
4446 # endif
4447  XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
4448  XXH_ASSERT(((size_t)dest & 31) == 0);
4449 
4450  /* GCC -O2 need unroll loop manually */
4451  dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
4452  dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
4453  dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
4454  dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
4455  dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
4456  dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
4457  }
4458 }
4459 
4460 #endif
4461 
4462 /* x86dispatch always generates SSE2 */
4463 #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
4464 
4465 #ifndef XXH_TARGET_SSE2
4466 # define XXH_TARGET_SSE2 /* disable attribute target */
4467 #endif
4468 
4469 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4470 XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
4471  const void* XXH_RESTRICT input,
4472  const void* XXH_RESTRICT secret)
4473 {
4474  /* SSE2 is just a half-scale version of the AVX2 version. */
4475  XXH_ASSERT((((size_t)acc) & 15) == 0);
4476  { __m128i* const xacc = (__m128i *) acc;
4477  /* Unaligned. This is mainly for pointer arithmetic, and because
4478  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4479  const __m128i* const xinput = (const __m128i *) input;
4480  /* Unaligned. This is mainly for pointer arithmetic, and because
4481  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4482  const __m128i* const xsecret = (const __m128i *) secret;
4483 
4484  size_t i;
4485  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4486  /* data_vec = xinput[i]; */
4487  __m128i const data_vec = _mm_loadu_si128 (xinput+i);
4488  /* key_vec = xsecret[i]; */
4489  __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4490  /* data_key = data_vec ^ key_vec; */
4491  __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4492  /* data_key_lo = data_key >> 32; */
4493  __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4494  /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4495  __m128i const product = _mm_mul_epu32 (data_key, data_key_lo);
4496  /* xacc[i] += swap(data_vec); */
4497  __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
4498  __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
4499  /* xacc[i] += product; */
4500  xacc[i] = _mm_add_epi64(product, sum);
4501  } }
4502 }
4503 XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
4504 
4505 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4506 XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4507 {
4508  XXH_ASSERT((((size_t)acc) & 15) == 0);
4509  { __m128i* const xacc = (__m128i*) acc;
4510  /* Unaligned. This is mainly for pointer arithmetic, and because
4511  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4512  const __m128i* const xsecret = (const __m128i *) secret;
4513  const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
4514 
4515  size_t i;
4516  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4517  /* xacc[i] ^= (xacc[i] >> 47) */
4518  __m128i const acc_vec = xacc[i];
4519  __m128i const shifted = _mm_srli_epi64 (acc_vec, 47);
4520  __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);
4521  /* xacc[i] ^= xsecret[i]; */
4522  __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4523  __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4524 
4525  /* xacc[i] *= XXH_PRIME32_1; */
4526  __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4527  __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);
4528  __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
4529  xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
4530  }
4531  }
4532 }
4533 
4534 XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4535 {
4536  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4537  (void)(&XXH_writeLE64);
4538  { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
4539 
4540 # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
4541  /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
4542  XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
4543  __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
4544 # else
4545  __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
4546 # endif
4547  int i;
4548 
4549  const void* const src16 = XXH3_kSecret;
4550  __m128i* dst16 = (__m128i*) customSecret;
4551 # if defined(__GNUC__) || defined(__clang__)
4552  /*
4553  * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4554  * - do not extract the secret from sse registers in the internal loop
4555  * - use less common registers, and avoid pushing these reg into stack
4556  */
4557  XXH_COMPILER_GUARD(dst16);
4558 # endif
4559  XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
4560  XXH_ASSERT(((size_t)dst16 & 15) == 0);
4561 
4562  for (i=0; i < nbRounds; ++i) {
4563  dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
4564  } }
4565 }
4566 
4567 #endif
4568 
4569 #if (XXH_VECTOR == XXH_NEON)
4570 
4571 /* forward declarations for the scalar routines */
4572 XXH_FORCE_INLINE void
4573 XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
4574  void const* XXH_RESTRICT secret, size_t lane);
4575 
4576 XXH_FORCE_INLINE void
4577 XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
4578  void const* XXH_RESTRICT secret, size_t lane);
4579 
4600 XXH_FORCE_INLINE void
4601 XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
4602  const void* XXH_RESTRICT input,
4603  const void* XXH_RESTRICT secret)
4604 {
4605  XXH_ASSERT((((size_t)acc) & 15) == 0);
4606  XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4607  { /* GCC for darwin arm64 does not like aliasing here */
4608  xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
4609  /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
4610  uint8_t const* const xinput = (const uint8_t *) input;
4611  uint8_t const* const xsecret = (const uint8_t *) secret;
4612 
4613  size_t i;
4614  /* Scalar lanes use the normal scalarRound routine */
4615  for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4616  XXH3_scalarRound(acc, input, secret, i);
4617  }
4618  i = 0;
4619  /* 4 NEON lanes at a time. */
4620  for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
4621  /* data_vec = xinput[i]; */
4622  uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
4623  uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
4624  /* key_vec = xsecret[i]; */
4625  uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
4626  uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
4627  /* data_swap = swap(data_vec) */
4628  uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
4629  uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
4630  /* data_key = data_vec ^ key_vec; */
4631  uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
4632  uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
4633 
4634  /*
4635  * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
4636  * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
4637  * get one vector with the low 32 bits of each lane, and one vector
4638  * with the high 32 bits of each lane.
4639  *
4640  * This compiles to two instructions on AArch64 and has a paired vector
4641  * result, which is an artifact from ARMv7a's version which modified both
4642  * vectors in place.
4643  *
4644  * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
4645  * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
4646  */
4647  uint32x4x2_t unzipped = vuzpq_u32(
4648  vreinterpretq_u32_u64(data_key_1),
4649  vreinterpretq_u32_u64(data_key_2)
4650  );
4651  /* data_key_lo = data_key & 0xFFFFFFFF */
4652  uint32x4_t data_key_lo = unzipped.val[0];
4653  /* data_key_hi = data_key >> 32 */
4654  uint32x4_t data_key_hi = unzipped.val[1];
4655  /*
4656  * Then, we can split the vectors horizontally and multiply which, as for most
4657  * widening intrinsics, have a variant that works on both high half vectors
4658  * for free on AArch64.
4659  *
4660  * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
4661  */
4662  uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
4663  uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
4664  /*
4665  * Clang reorders
4666  * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s
4667  * c += a; // add acc.2d, acc.2d, swap.2d
4668  * to
4669  * c += a; // add acc.2d, acc.2d, swap.2d
4670  * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s
4671  *
4672  * While it would make sense in theory since the addition is faster,
4673  * for reasons likely related to umlal being limited to certain NEON
4674  * pipelines, this is worse. A compiler guard fixes this.
4675  */
4676  XXH_COMPILER_GUARD_W(sum_1);
4677  XXH_COMPILER_GUARD_W(sum_2);
4678  /* xacc[i] = acc_vec + sum; */
4679  xacc[i] = vaddq_u64(xacc[i], sum_1);
4680  xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
4681  }
4682  /* Operate on the remaining NEON lanes 2 at a time. */
4683  for (; i < XXH3_NEON_LANES / 2; i++) {
4684  /* data_vec = xinput[i]; */
4685  uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
4686  /* key_vec = xsecret[i]; */
4687  uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4688  /* acc_vec_2 = swap(data_vec) */
4689  uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
4690  /* data_key = data_vec ^ key_vec; */
4691  uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4692  /* For two lanes, just use VMOVN and VSHRN. */
4693  /* data_key_lo = data_key & 0xFFFFFFFF; */
4694  uint32x2_t data_key_lo = vmovn_u64(data_key);
4695  /* data_key_hi = data_key >> 32; */
4696  uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4697  /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
4698  uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
4699  /* Same Clang workaround as before */
4700  XXH_COMPILER_GUARD_W(sum);
4701  /* xacc[i] = acc_vec + sum; */
4702  xacc[i] = vaddq_u64 (xacc[i], sum);
4703  }
4704  }
4705 }
4706 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
4707 
4708 XXH_FORCE_INLINE void
4709 XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4710 {
4711  XXH_ASSERT((((size_t)acc) & 15) == 0);
4712 
4713  { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;
4714  uint8_t const* xsecret = (uint8_t const*) secret;
4715  uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
4716 
4717  size_t i;
4718  /* AArch64 uses both scalar and neon at the same time */
4719  for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4720  XXH3_scalarScrambleRound(acc, secret, i);
4721  }
4722  for (i=0; i < XXH3_NEON_LANES / 2; i++) {
4723  /* xacc[i] ^= (xacc[i] >> 47); */
4724  uint64x2_t acc_vec = xacc[i];
4725  uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
4726  uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
4727 
4728  /* xacc[i] ^= xsecret[i]; */
4729  uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4730  uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4731 
4732  /* xacc[i] *= XXH_PRIME32_1 */
4733  uint32x2_t data_key_lo = vmovn_u64(data_key);
4734  uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4735  /*
4736  * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
4737  *
4738  * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
4739  * incorrectly "optimize" this:
4740  * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b));
4741  * shifted = vshll_n_u32(tmp, 32);
4742  * to this:
4743  * tmp = "vmulq_u64"(a, b); // no such thing!
4744  * shifted = vshlq_n_u64(tmp, 32);
4745  *
4746  * However, unlike SSE, Clang lacks a 64-bit multiply routine
4747  * for NEON, and it scalarizes two 64-bit multiplies instead.
4748  *
4749  * vmull_u32 has the same timing as vmul_u32, and it avoids
4750  * this bug completely.
4751  * See https://bugs.llvm.org/show_bug.cgi?id=39967
4752  */
4753  uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4754  /* xacc[i] = prod_hi << 32; */
4755  prod_hi = vshlq_n_u64(prod_hi, 32);
4756  /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
4757  xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
4758  }
4759  }
4760 }
4761 #endif
4762 
4763 #if (XXH_VECTOR == XXH_VSX)
4764 
4765 XXH_FORCE_INLINE void
4766 XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
4767  const void* XXH_RESTRICT input,
4768  const void* XXH_RESTRICT secret)
4769 {
4770  /* presumed aligned */
4771  xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
4772  xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */
4773  xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
4774  xxh_u64x2 const v32 = { 32, 32 };
4775  size_t i;
4776  for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4777  /* data_vec = xinput[i]; */
4778  xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
4779  /* key_vec = xsecret[i]; */
4780  xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
4781  xxh_u64x2 const data_key = data_vec ^ key_vec;
4782  /* shuffled = (data_key << 32) | (data_key >> 32); */
4783  xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4784  /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
4785  xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4786  /* acc_vec = xacc[i]; */
4787  xxh_u64x2 acc_vec = xacc[i];
4788  acc_vec += product;
4789 
4790  /* swap high and low halves */
4791 #ifdef __s390x__
4792  acc_vec += vec_permi(data_vec, data_vec, 2);
4793 #else
4794  acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4795 #endif
4796  xacc[i] = acc_vec;
4797  }
4798 }
4799 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
4800 
4801 XXH_FORCE_INLINE void
4802 XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4803 {
4804  XXH_ASSERT((((size_t)acc) & 15) == 0);
4805 
4806  { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
4807  const xxh_u8* const xsecret = (const xxh_u8*) secret;
4808  /* constants */
4809  xxh_u64x2 const v32 = { 32, 32 };
4810  xxh_u64x2 const v47 = { 47, 47 };
4811  xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
4812  size_t i;
4813  for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4814  /* xacc[i] ^= (xacc[i] >> 47); */
4815  xxh_u64x2 const acc_vec = xacc[i];
4816  xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
4817 
4818  /* xacc[i] ^= xsecret[i]; */
4819  xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
4820  xxh_u64x2 const data_key = data_vec ^ key_vec;
4821 
4822  /* xacc[i] *= XXH_PRIME32_1 */
4823  /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */
4824  xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
4825  /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */
4826  xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
4827  xacc[i] = prod_odd + (prod_even << v32);
4828  } }
4829 }
4830 
4831 #endif
4832 
4833 #if (XXH_VECTOR == XXH_SVE)
4834 
4835 XXH_FORCE_INLINE void
4836 XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
4837  const void* XXH_RESTRICT input,
4838  const void* XXH_RESTRICT secret)
4839 {
4840  uint64_t *xacc = (uint64_t *)acc;
4841  const uint64_t *xinput = (const uint64_t *)(const void *)input;
4842  const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
4843  svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4844  uint64_t element_count = svcntd();
4845  if (element_count >= 8) {
4846  svbool_t mask = svptrue_pat_b64(SV_VL8);
4847  svuint64_t vacc = svld1_u64(mask, xacc);
4848  ACCRND(vacc, 0);
4849  svst1_u64(mask, xacc, vacc);
4850  } else if (element_count == 2) { /* sve128 */
4851  svbool_t mask = svptrue_pat_b64(SV_VL2);
4852  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4853  svuint64_t acc1 = svld1_u64(mask, xacc + 2);
4854  svuint64_t acc2 = svld1_u64(mask, xacc + 4);
4855  svuint64_t acc3 = svld1_u64(mask, xacc + 6);
4856  ACCRND(acc0, 0);
4857  ACCRND(acc1, 2);
4858  ACCRND(acc2, 4);
4859  ACCRND(acc3, 6);
4860  svst1_u64(mask, xacc + 0, acc0);
4861  svst1_u64(mask, xacc + 2, acc1);
4862  svst1_u64(mask, xacc + 4, acc2);
4863  svst1_u64(mask, xacc + 6, acc3);
4864  } else {
4865  svbool_t mask = svptrue_pat_b64(SV_VL4);
4866  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4867  svuint64_t acc1 = svld1_u64(mask, xacc + 4);
4868  ACCRND(acc0, 0);
4869  ACCRND(acc1, 4);
4870  svst1_u64(mask, xacc + 0, acc0);
4871  svst1_u64(mask, xacc + 4, acc1);
4872  }
4873 }
4874 
4875 XXH_FORCE_INLINE void
4876 XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
4877  const xxh_u8* XXH_RESTRICT input,
4878  const xxh_u8* XXH_RESTRICT secret,
4879  size_t nbStripes)
4880 {
4881  if (nbStripes != 0) {
4882  uint64_t *xacc = (uint64_t *)acc;
4883  const uint64_t *xinput = (const uint64_t *)(const void *)input;
4884  const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
4885  svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4886  uint64_t element_count = svcntd();
4887  if (element_count >= 8) {
4888  svbool_t mask = svptrue_pat_b64(SV_VL8);
4889  svuint64_t vacc = svld1_u64(mask, xacc + 0);
4890  do {
4891  /* svprfd(svbool_t, void *, enum svfprop); */
4892  svprfd(mask, xinput + 128, SV_PLDL1STRM);
4893  ACCRND(vacc, 0);
4894  xinput += 8;
4895  xsecret += 1;
4896  nbStripes--;
4897  } while (nbStripes != 0);
4898 
4899  svst1_u64(mask, xacc + 0, vacc);
4900  } else if (element_count == 2) { /* sve128 */
4901  svbool_t mask = svptrue_pat_b64(SV_VL2);
4902  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4903  svuint64_t acc1 = svld1_u64(mask, xacc + 2);
4904  svuint64_t acc2 = svld1_u64(mask, xacc + 4);
4905  svuint64_t acc3 = svld1_u64(mask, xacc + 6);
4906  do {
4907  svprfd(mask, xinput + 128, SV_PLDL1STRM);
4908  ACCRND(acc0, 0);
4909  ACCRND(acc1, 2);
4910  ACCRND(acc2, 4);
4911  ACCRND(acc3, 6);
4912  xinput += 8;
4913  xsecret += 1;
4914  nbStripes--;
4915  } while (nbStripes != 0);
4916 
4917  svst1_u64(mask, xacc + 0, acc0);
4918  svst1_u64(mask, xacc + 2, acc1);
4919  svst1_u64(mask, xacc + 4, acc2);
4920  svst1_u64(mask, xacc + 6, acc3);
4921  } else {
4922  svbool_t mask = svptrue_pat_b64(SV_VL4);
4923  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4924  svuint64_t acc1 = svld1_u64(mask, xacc + 4);
4925  do {
4926  svprfd(mask, xinput + 128, SV_PLDL1STRM);
4927  ACCRND(acc0, 0);
4928  ACCRND(acc1, 4);
4929  xinput += 8;
4930  xsecret += 1;
4931  nbStripes--;
4932  } while (nbStripes != 0);
4933 
4934  svst1_u64(mask, xacc + 0, acc0);
4935  svst1_u64(mask, xacc + 4, acc1);
4936  }
4937  }
4938 }
4939 
4940 #endif
4941 
4942 /* scalar variants - universal */
4943 
4944 #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
4945 /*
4946  * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
4947  * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
4948  *
4949  * While this might not seem like much, as AArch64 is a 64-bit architecture, only
4950  * big Cortex designs have a full 64-bit multiplier.
4951  *
4952  * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
4953  * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
4954  * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
4955  *
4956  * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
4957  * not have this penalty and does the mask automatically.
4958  */
4959 XXH_FORCE_INLINE xxh_u64
4960 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
4961 {
4962  xxh_u64 ret;
4963  /* note: %x = 64-bit register, %w = 32-bit register */
4964  __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
4965  return ret;
4966 }
4967 #else
4968 XXH_FORCE_INLINE xxh_u64
4969 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
4970 {
4971  return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
4972 }
4973 #endif
4974 
4982 XXH_FORCE_INLINE void
4983 XXH3_scalarRound(void* XXH_RESTRICT acc,
4984  void const* XXH_RESTRICT input,
4985  void const* XXH_RESTRICT secret,
4986  size_t lane)
4987 {
4988  xxh_u64* xacc = (xxh_u64*) acc;
4989  xxh_u8 const* xinput = (xxh_u8 const*) input;
4990  xxh_u8 const* xsecret = (xxh_u8 const*) secret;
4991  XXH_ASSERT(lane < XXH_ACC_NB);
4992  XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
4993  {
4994  xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
4995  xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
4996  xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
4997  xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
4998  }
4999 }
5000 
5005 XXH_FORCE_INLINE void
5006 XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
5007  const void* XXH_RESTRICT input,
5008  const void* XXH_RESTRICT secret)
5009 {
5010  size_t i;
5011  /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
5012 #if defined(__GNUC__) && !defined(__clang__) \
5013  && (defined(__arm__) || defined(__thumb2__)) \
5014  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
5015  && XXH_SIZE_OPT <= 0
5016 # pragma GCC unroll 8
5017 #endif
5018  for (i=0; i < XXH_ACC_NB; i++) {
5019  XXH3_scalarRound(acc, input, secret, i);
5020  }
5021 }
5022 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
5023 
5024 
5031 XXH_FORCE_INLINE void
5032 XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
5033  void const* XXH_RESTRICT secret,
5034  size_t lane)
5035 {
5036  xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
5037  const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
5038  XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
5039  XXH_ASSERT(lane < XXH_ACC_NB);
5040  {
5041  xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
5042  xxh_u64 acc64 = xacc[lane];
5043  acc64 = XXH_xorshift64(acc64, 47);
5044  acc64 ^= key64;
5045  acc64 *= XXH_PRIME32_1;
5046  xacc[lane] = acc64;
5047  }
5048 }
5049 
5054 XXH_FORCE_INLINE void
5055 XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5056 {
5057  size_t i;
5058  for (i=0; i < XXH_ACC_NB; i++) {
5059  XXH3_scalarScrambleRound(acc, secret, i);
5060  }
5061 }
5062 
5063 XXH_FORCE_INLINE void
5064 XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5065 {
5066  /*
5067  * We need a separate pointer for the hack below,
5068  * which requires a non-const pointer.
5069  * Any decent compiler will optimize this out otherwise.
5070  */
5071  const xxh_u8* kSecretPtr = XXH3_kSecret;
5072  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
5073 
5074 #if defined(__GNUC__) && defined(__aarch64__)
5075  /*
5076  * UGLY HACK:
5077  * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
5078  * placed sequentially, in order, at the top of the unrolled loop.
5079  *
5080  * While MOVK is great for generating constants (2 cycles for a 64-bit
5081  * constant compared to 4 cycles for LDR), it fights for bandwidth with
5082  * the arithmetic instructions.
5083  *
5084  * I L S
5085  * MOVK
5086  * MOVK
5087  * MOVK
5088  * MOVK
5089  * ADD
5090  * SUB STR
5091  * STR
5092  * By forcing loads from memory (as the asm line causes the compiler to assume
5093  * that XXH3_kSecretPtr has been changed), the pipelines are used more
5094  * efficiently:
5095  * I L S
5096  * LDR
5097  * ADD LDR
5098  * SUB STR
5099  * STR
5100  *
5101  * See XXH3_NEON_LANES for details on the pipsline.
5102  *
5103  * XXH3_64bits_withSeed, len == 256, Snapdragon 835
5104  * without hack: 2654.4 MB/s
5105  * with hack: 3202.9 MB/s
5106  */
5107  XXH_COMPILER_GUARD(kSecretPtr);
5108 #endif
5109  { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
5110  int i;
5111  for (i=0; i < nbRounds; i++) {
5112  /*
5113  * The asm hack causes the compiler to assume that kSecretPtr aliases with
5114  * customSecret, and on aarch64, this prevented LDP from merging two
5115  * loads together for free. Putting the loads together before the stores
5116  * properly generates LDP.
5117  */
5118  xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;
5119  xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
5120  XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);
5121  XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
5122  } }
5123 }
5124 
5125 
5126 typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
5127 typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
5128 typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
5129 
5130 
5131 #if (XXH_VECTOR == XXH_AVX512)
5132 
5133 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
5134 #define XXH3_accumulate XXH3_accumulate_avx512
5135 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
5136 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
5137 
5138 #elif (XXH_VECTOR == XXH_AVX2)
5139 
5140 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
5141 #define XXH3_accumulate XXH3_accumulate_avx2
5142 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
5143 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
5144 
5145 #elif (XXH_VECTOR == XXH_SSE2)
5146 
5147 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
5148 #define XXH3_accumulate XXH3_accumulate_sse2
5149 #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
5150 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
5151 
5152 #elif (XXH_VECTOR == XXH_NEON)
5153 
5154 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
5155 #define XXH3_accumulate XXH3_accumulate_neon
5156 #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
5157 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5158 
5159 #elif (XXH_VECTOR == XXH_VSX)
5160 
5161 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
5162 #define XXH3_accumulate XXH3_accumulate_vsx
5163 #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
5164 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5165 
5166 #elif (XXH_VECTOR == XXH_SVE)
5167 #define XXH3_accumulate_512 XXH3_accumulate_512_sve
5168 #define XXH3_accumulate XXH3_accumulate_sve
5169 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5170 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5171 
5172 #else /* scalar */
5173 
5174 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
5175 #define XXH3_accumulate XXH3_accumulate_scalar
5176 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5177 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5178 
5179 #endif
5180 
5181 #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
5182 # undef XXH3_initCustomSecret
5183 # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5184 #endif
5185 
5186 XXH_FORCE_INLINE void
5187 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
5188  const xxh_u8* XXH_RESTRICT input, size_t len,
5189  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5190  XXH3_f_accumulate f_acc,
5191  XXH3_f_scrambleAcc f_scramble)
5192 {
5193  size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
5194  size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
5195  size_t const nb_blocks = (len - 1) / block_len;
5196 
5197  size_t n;
5198 
5199  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5200 
5201  for (n = 0; n < nb_blocks; n++) {
5202  f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
5203  f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
5204  }
5205 
5206  /* last partial block */
5207  XXH_ASSERT(len > XXH_STRIPE_LEN);
5208  { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
5209  XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
5210  f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
5211 
5212  /* last stripe */
5213  { const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
5214 #define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
5215  XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
5216  } }
5217 }
5218 
5219 XXH_FORCE_INLINE xxh_u64
5220 XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
5221 {
5222  return XXH3_mul128_fold64(
5223  acc[0] ^ XXH_readLE64(secret),
5224  acc[1] ^ XXH_readLE64(secret+8) );
5225 }
5226 
5227 static XXH64_hash_t
5228 XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
5229 {
5230  xxh_u64 result64 = start;
5231  size_t i = 0;
5232 
5233  for (i = 0; i < 4; i++) {
5234  result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
5235 #if defined(__clang__) /* Clang */ \
5236  && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
5237  && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
5238  && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
5239  /*
5240  * UGLY HACK:
5241  * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
5242  * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
5243  * XXH3_64bits, len == 256, Snapdragon 835:
5244  * without hack: 2063.7 MB/s
5245  * with hack: 2560.7 MB/s
5246  */
5247  XXH_COMPILER_GUARD(result64);
5248 #endif
5249  }
5250 
5251  return XXH3_avalanche(result64);
5252 }
5253 
5254 #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
5255  XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
5256 
5257 XXH_FORCE_INLINE XXH64_hash_t
5258 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
5259  const void* XXH_RESTRICT secret, size_t secretSize,
5260  XXH3_f_accumulate f_acc,
5261  XXH3_f_scrambleAcc f_scramble)
5262 {
5263  XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5264 
5265  XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
5266 
5267  /* converge into final hash */
5268  XXH_STATIC_ASSERT(sizeof(acc) == 64);
5269  /* do not align on 8, so that the secret is different from the accumulator */
5270 #define XXH_SECRET_MERGEACCS_START 11
5271  XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
5272  return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
5273 }
5274 
5275 /*
5276  * It's important for performance to transmit secret's size (when it's static)
5277  * so that the compiler can properly optimize the vectorized loop.
5278  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
5279  * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
5280  * breaks -Og, this is XXH_NO_INLINE.
5281  */
5282 XXH3_WITH_SECRET_INLINE XXH64_hash_t
5283 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
5284  XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5285 {
5286  (void)seed64;
5287  return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
5288 }
5289 
5290 /*
5291  * It's preferable for performance that XXH3_hashLong is not inlined,
5292  * as it results in a smaller function for small data, easier to the instruction cache.
5293  * Note that inside this no_inline function, we do inline the internal loop,
5294  * and provide a statically defined secret size to allow optimization of vector loop.
5295  */
5296 XXH_NO_INLINE XXH_PUREF XXH64_hash_t
5297 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
5298  XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5299 {
5300  (void)seed64; (void)secret; (void)secretLen;
5301  return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
5302 }
5303 
5304 /*
5305  * XXH3_hashLong_64b_withSeed():
5306  * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
5307  * and then use this key for long mode hashing.
5308  *
5309  * This operation is decently fast but nonetheless costs a little bit of time.
5310  * Try to avoid it whenever possible (typically when seed==0).
5311  *
5312  * It's important for performance that XXH3_hashLong is not inlined. Not sure
5313  * why (uop cache maybe?), but the difference is large and easily measurable.
5314  */
5315 XXH_FORCE_INLINE XXH64_hash_t
5316 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
5317  XXH64_hash_t seed,
5318  XXH3_f_accumulate f_acc,
5319  XXH3_f_scrambleAcc f_scramble,
5320  XXH3_f_initCustomSecret f_initSec)
5321 {
5322 #if XXH_SIZE_OPT <= 0
5323  if (seed == 0)
5324  return XXH3_hashLong_64b_internal(input, len,
5325  XXH3_kSecret, sizeof(XXH3_kSecret),
5326  f_acc, f_scramble);
5327 #endif
5328  { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5329  f_initSec(secret, seed);
5330  return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
5331  f_acc, f_scramble);
5332  }
5333 }
5334 
5335 /*
5336  * It's important for performance that XXH3_hashLong is not inlined.
5337  */
5338 XXH_NO_INLINE XXH64_hash_t
5339 XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
5340  XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5341 {
5342  (void)secret; (void)secretLen;
5343  return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
5344  XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
5345 }
5346 
5347 
5348 typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
5349  XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
5350 
5351 XXH_FORCE_INLINE XXH64_hash_t
5352 XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
5353  XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
5354  XXH3_hashLong64_f f_hashLong)
5355 {
5356  XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
5357  /*
5358  * If an action is to be taken if `secretLen` condition is not respected,
5359  * it should be done here.
5360  * For now, it's a contract pre-condition.
5361  * Adding a check and a branch here would cost performance at every hash.
5362  * Also, note that function signature doesn't offer room to return an error.
5363  */
5364  if (len <= 16)
5365  return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
5366  if (len <= 128)
5367  return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5368  if (len <= XXH3_MIDSIZE_MAX)
5369  return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5370  return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
5371 }
5372 
5373 
5374 /* === Public entry point === */
5375 
5378 {
5379  return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5380 }
5381 
5384 XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
5385 {
5386  return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5387 }
5388 
5391 XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
5392 {
5393  return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5394 }
5395 
5397 XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
5398 {
5399  if (length <= XXH3_MIDSIZE_MAX)
5400  return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5401  return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
5402 }
5403 
5404 
5405 /* === XXH3 streaming === */
5406 #ifndef XXH_NO_STREAM
5407 /*
5408  * Malloc's a pointer that is always aligned to align.
5409  *
5410  * This must be freed with `XXH_alignedFree()`.
5411  *
5412  * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
5413  * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
5414  * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
5415  *
5416  * This underalignment previously caused a rather obvious crash which went
5417  * completely unnoticed due to XXH3_createState() not actually being tested.
5418  * Credit to RedSpah for noticing this bug.
5419  *
5420  * The alignment is done manually: Functions like posix_memalign or _mm_malloc
5421  * are avoided: To maintain portability, we would have to write a fallback
5422  * like this anyways, and besides, testing for the existence of library
5423  * functions without relying on external build tools is impossible.
5424  *
5425  * The method is simple: Overallocate, manually align, and store the offset
5426  * to the original behind the returned pointer.
5427  *
5428  * Align must be a power of 2 and 8 <= align <= 128.
5429  */
5430 static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
5431 {
5432  XXH_ASSERT(align <= 128 && align >= 8); /* range check */
5433  XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
5434  XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
5435  { /* Overallocate to make room for manual realignment and an offset byte */
5436  xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
5437  if (base != NULL) {
5438  /*
5439  * Get the offset needed to align this pointer.
5440  *
5441  * Even if the returned pointer is aligned, there will always be
5442  * at least one byte to store the offset to the original pointer.
5443  */
5444  size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
5445  /* Add the offset for the now-aligned pointer */
5446  xxh_u8* ptr = base + offset;
5447 
5448  XXH_ASSERT((size_t)ptr % align == 0);
5449 
5450  /* Store the offset immediately before the returned pointer. */
5451  ptr[-1] = (xxh_u8)offset;
5452  return ptr;
5453  }
5454  return NULL;
5455  }
5456 }
5457 /*
5458  * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
5459  * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
5460  */
5461 static void XXH_alignedFree(void* p)
5462 {
5463  if (p != NULL) {
5464  xxh_u8* ptr = (xxh_u8*)p;
5465  /* Get the offset byte we added in XXH_malloc. */
5466  xxh_u8 offset = ptr[-1];
5467  /* Free the original malloc'd pointer */
5468  xxh_u8* base = ptr - offset;
5469  XXH_free(base);
5470  }
5471 }
5474 {
5475  XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
5476  if (state==NULL) return NULL;
5477  XXH3_INITSTATE(state);
5478  return state;
5479 }
5480 
5483 {
5484  XXH_alignedFree(statePtr);
5485  return XXH_OK;
5486 }
5487 
5489 XXH_PUBLIC_API void
5491 {
5492  XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
5493 }
5494 
5495 static void
5496 XXH3_reset_internal(XXH3_state_t* statePtr,
5497  XXH64_hash_t seed,
5498  const void* secret, size_t secretSize)
5499 {
5500  size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
5501  size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
5502  XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
5503  XXH_ASSERT(statePtr != NULL);
5504  /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
5505  memset((char*)statePtr + initStart, 0, initLength);
5506  statePtr->acc[0] = XXH_PRIME32_3;
5507  statePtr->acc[1] = XXH_PRIME64_1;
5508  statePtr->acc[2] = XXH_PRIME64_2;
5509  statePtr->acc[3] = XXH_PRIME64_3;
5510  statePtr->acc[4] = XXH_PRIME64_4;
5511  statePtr->acc[5] = XXH_PRIME32_2;
5512  statePtr->acc[6] = XXH_PRIME64_5;
5513  statePtr->acc[7] = XXH_PRIME32_1;
5514  statePtr->seed = seed;
5515  statePtr->useSeed = (seed != 0);
5516  statePtr->extSecret = (const unsigned char*)secret;
5517  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5518  statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
5519  statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
5520 }
5521 
5525 {
5526  if (statePtr == NULL) return XXH_ERROR;
5527  XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5528  return XXH_OK;
5529 }
5530 
5533 XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
5534 {
5535  if (statePtr == NULL) return XXH_ERROR;
5536  XXH3_reset_internal(statePtr, 0, secret, secretSize);
5537  if (secret == NULL) return XXH_ERROR;
5538  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5539  return XXH_OK;
5540 }
5541 
5545 {
5546  if (statePtr == NULL) return XXH_ERROR;
5547  if (seed==0) return XXH3_64bits_reset(statePtr);
5548  if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5549  XXH3_initCustomSecret(statePtr->customSecret, seed);
5550  XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5551  return XXH_OK;
5552 }
5553 
5556 XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
5557 {
5558  if (statePtr == NULL) return XXH_ERROR;
5559  if (secret == NULL) return XXH_ERROR;
5560  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5561  XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5562  statePtr->useSeed = 1; /* always, even if seed64==0 */
5563  return XXH_OK;
5564 }
5565 
5583 XXH_FORCE_INLINE const xxh_u8 *
5584 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
5585  size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
5586  const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
5587  const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
5588  XXH3_f_accumulate f_acc,
5589  XXH3_f_scrambleAcc f_scramble)
5590 {
5591  const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
5592  /* Process full blocks */
5593  if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
5594  /* Process the initial partial block... */
5595  size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
5596 
5597  do {
5598  /* Accumulate and scramble */
5599  f_acc(acc, input, initialSecret, nbStripesThisIter);
5600  f_scramble(acc, secret + secretLimit);
5601  input += nbStripesThisIter * XXH_STRIPE_LEN;
5602  nbStripes -= nbStripesThisIter;
5603  /* Then continue the loop with the full block size */
5604  nbStripesThisIter = nbStripesPerBlock;
5605  initialSecret = secret;
5606  } while (nbStripes >= nbStripesPerBlock);
5607  *nbStripesSoFarPtr = 0;
5608  }
5609  /* Process a partial block */
5610  if (nbStripes > 0) {
5611  f_acc(acc, input, initialSecret, nbStripes);
5612  input += nbStripes * XXH_STRIPE_LEN;
5613  *nbStripesSoFarPtr += nbStripes;
5614  }
5615  /* Return end pointer */
5616  return input;
5617 }
5618 
5619 #ifndef XXH3_STREAM_USE_STACK
5620 # if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
5621 # define XXH3_STREAM_USE_STACK 1
5622 # endif
5623 #endif
5624 /*
5625  * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
5626  */
5627 XXH_FORCE_INLINE XXH_errorcode
5628 XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
5629  const xxh_u8* XXH_RESTRICT input, size_t len,
5630  XXH3_f_accumulate f_acc,
5631  XXH3_f_scrambleAcc f_scramble)
5632 {
5633  if (input==NULL) {
5634  XXH_ASSERT(len == 0);
5635  return XXH_OK;
5636  }
5637 
5638  XXH_ASSERT(state != NULL);
5639  { const xxh_u8* const bEnd = input + len;
5640  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5641 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5642  /* For some reason, gcc and MSVC seem to suffer greatly
5643  * when operating accumulators directly into state.
5644  * Operating into stack space seems to enable proper optimization.
5645  * clang, on the other hand, doesn't seem to need this trick */
5646  XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
5647  XXH_memcpy(acc, state->acc, sizeof(acc));
5648 #else
5649  xxh_u64* XXH_RESTRICT const acc = state->acc;
5650 #endif
5651  state->totalLen += len;
5652  XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
5653 
5654  /* small input : just fill in tmp buffer */
5655  if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
5656  XXH_memcpy(state->buffer + state->bufferedSize, input, len);
5657  state->bufferedSize += (XXH32_hash_t)len;
5658  return XXH_OK;
5659  }
5660 
5661  /* total input is now > XXH3_INTERNALBUFFER_SIZE */
5662  #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
5663  XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
5664 
5665  /*
5666  * Internal buffer is partially filled (always, except at beginning)
5667  * Complete it, then consume it.
5668  */
5669  if (state->bufferedSize) {
5670  size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
5671  XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
5672  input += loadSize;
5673  XXH3_consumeStripes(acc,
5674  &state->nbStripesSoFar, state->nbStripesPerBlock,
5675  state->buffer, XXH3_INTERNALBUFFER_STRIPES,
5676  secret, state->secretLimit,
5677  f_acc, f_scramble);
5678  state->bufferedSize = 0;
5679  }
5680  XXH_ASSERT(input < bEnd);
5681  if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
5682  size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
5683  input = XXH3_consumeStripes(acc,
5684  &state->nbStripesSoFar, state->nbStripesPerBlock,
5685  input, nbStripes,
5686  secret, state->secretLimit,
5687  f_acc, f_scramble);
5688  XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5689 
5690  }
5691  /* Some remaining input (always) : buffer it */
5692  XXH_ASSERT(input < bEnd);
5693  XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
5694  XXH_ASSERT(state->bufferedSize == 0);
5695  XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
5696  state->bufferedSize = (XXH32_hash_t)(bEnd-input);
5697 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5698  /* save stack accumulators into state */
5699  XXH_memcpy(state->acc, acc, sizeof(acc));
5700 #endif
5701  }
5702 
5703  return XXH_OK;
5704 }
5705 
5708 XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
5709 {
5710  return XXH3_update(state, (const xxh_u8*)input, len,
5711  XXH3_accumulate, XXH3_scrambleAcc);
5712 }
5713 
5714 
5715 XXH_FORCE_INLINE void
5716 XXH3_digest_long (XXH64_hash_t* acc,
5717  const XXH3_state_t* state,
5718  const unsigned char* secret)
5719 {
5720  xxh_u8 lastStripe[XXH_STRIPE_LEN];
5721  const xxh_u8* lastStripePtr;
5722 
5723  /*
5724  * Digest on a local copy. This way, the state remains unaltered, and it can
5725  * continue ingesting more input afterwards.
5726  */
5727  XXH_memcpy(acc, state->acc, sizeof(state->acc));
5728  if (state->bufferedSize >= XXH_STRIPE_LEN) {
5729  /* Consume remaining stripes then point to remaining data in buffer */
5730  size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
5731  size_t nbStripesSoFar = state->nbStripesSoFar;
5732  XXH3_consumeStripes(acc,
5733  &nbStripesSoFar, state->nbStripesPerBlock,
5734  state->buffer, nbStripes,
5735  secret, state->secretLimit,
5736  XXH3_accumulate, XXH3_scrambleAcc);
5737  lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
5738  } else { /* bufferedSize < XXH_STRIPE_LEN */
5739  /* Copy to temp buffer */
5740  size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
5741  XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
5742  XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
5743  XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
5744  lastStripePtr = lastStripe;
5745  }
5746  /* Last stripe */
5747  XXH3_accumulate_512(acc,
5748  lastStripePtr,
5749  secret + state->secretLimit - XXH_SECRET_LASTACC_START);
5750 }
5751 
5754 {
5755  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5756  if (state->totalLen > XXH3_MIDSIZE_MAX) {
5757  XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
5758  XXH3_digest_long(acc, state, secret);
5759  return XXH3_mergeAccs(acc,
5760  secret + XXH_SECRET_MERGEACCS_START,
5761  (xxh_u64)state->totalLen * XXH_PRIME64_1);
5762  }
5763  /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
5764  if (state->useSeed)
5765  return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
5766  return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
5767  secret, state->secretLimit + XXH_STRIPE_LEN);
5768 }
5769 #endif /* !XXH_NO_STREAM */
5770 
5771 
5772 /* ==========================================
5773  * XXH3 128 bits (a.k.a XXH128)
5774  * ==========================================
5775  * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
5776  * even without counting the significantly larger output size.
5777  *
5778  * For example, extra steps are taken to avoid the seed-dependent collisions
5779  * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
5780  *
5781  * This strength naturally comes at the cost of some speed, especially on short
5782  * lengths. Note that longer hashes are about as fast as the 64-bit version
5783  * due to it using only a slight modification of the 64-bit loop.
5784  *
5785  * XXH128 is also more oriented towards 64-bit machines. It is still extremely
5786  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
5787  */
5788 
5789 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5790 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5791 {
5792  /* A doubled version of 1to3_64b with different constants. */
5793  XXH_ASSERT(input != NULL);
5794  XXH_ASSERT(1 <= len && len <= 3);
5795  XXH_ASSERT(secret != NULL);
5796  /*
5797  * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
5798  * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
5799  * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
5800  */
5801  { xxh_u8 const c1 = input[0];
5802  xxh_u8 const c2 = input[len >> 1];
5803  xxh_u8 const c3 = input[len - 1];
5804  xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
5805  | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
5806  xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
5807  xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
5808  xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
5809  xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
5810  xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
5811  XXH128_hash_t h128;
5812  h128.low64 = XXH64_avalanche(keyed_lo);
5813  h128.high64 = XXH64_avalanche(keyed_hi);
5814  return h128;
5815  }
5816 }
5817 
5818 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5819 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5820 {
5821  XXH_ASSERT(input != NULL);
5822  XXH_ASSERT(secret != NULL);
5823  XXH_ASSERT(4 <= len && len <= 8);
5824  seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
5825  { xxh_u32 const input_lo = XXH_readLE32(input);
5826  xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
5827  xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
5828  xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
5829  xxh_u64 const keyed = input_64 ^ bitflip;
5830 
5831  /* Shift len to the left to ensure it is even, this avoids even multiplies. */
5832  XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
5833 
5834  m128.high64 += (m128.low64 << 1);
5835  m128.low64 ^= (m128.high64 >> 3);
5836 
5837  m128.low64 = XXH_xorshift64(m128.low64, 35);
5838  m128.low64 *= PRIME_MX2;
5839  m128.low64 = XXH_xorshift64(m128.low64, 28);
5840  m128.high64 = XXH3_avalanche(m128.high64);
5841  return m128;
5842  }
5843 }
5844 
5845 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5846 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5847 {
5848  XXH_ASSERT(input != NULL);
5849  XXH_ASSERT(secret != NULL);
5850  XXH_ASSERT(9 <= len && len <= 16);
5851  { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
5852  xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
5853  xxh_u64 const input_lo = XXH_readLE64(input);
5854  xxh_u64 input_hi = XXH_readLE64(input + len - 8);
5855  XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
5856  /*
5857  * Put len in the middle of m128 to ensure that the length gets mixed to
5858  * both the low and high bits in the 128x64 multiply below.
5859  */
5860  m128.low64 += (xxh_u64)(len - 1) << 54;
5861  input_hi ^= bitfliph;
5862  /*
5863  * Add the high 32 bits of input_hi to the high 32 bits of m128, then
5864  * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
5865  * the high 64 bits of m128.
5866  *
5867  * The best approach to this operation is different on 32-bit and 64-bit.
5868  */
5869  if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
5870  /*
5871  * 32-bit optimized version, which is more readable.
5872  *
5873  * On 32-bit, it removes an ADC and delays a dependency between the two
5874  * halves of m128.high64, but it generates an extra mask on 64-bit.
5875  */
5876  m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
5877  } else {
5878  /*
5879  * 64-bit optimized (albeit more confusing) version.
5880  *
5881  * Uses some properties of addition and multiplication to remove the mask:
5882  *
5883  * Let:
5884  * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
5885  * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
5886  * c = XXH_PRIME32_2
5887  *
5888  * a + (b * c)
5889  * Inverse Property: x + y - x == y
5890  * a + (b * (1 + c - 1))
5891  * Distributive Property: x * (y + z) == (x * y) + (x * z)
5892  * a + (b * 1) + (b * (c - 1))
5893  * Identity Property: x * 1 == x
5894  * a + b + (b * (c - 1))
5895  *
5896  * Substitute a, b, and c:
5897  * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5898  *
5899  * Since input_hi.hi + input_hi.lo == input_hi, we get this:
5900  * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5901  */
5902  m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
5903  }
5904  /* m128 ^= XXH_swap64(m128 >> 64); */
5905  m128.low64 ^= XXH_swap64(m128.high64);
5906 
5907  { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
5908  XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
5909  h128.high64 += m128.high64 * XXH_PRIME64_2;
5910 
5911  h128.low64 = XXH3_avalanche(h128.low64);
5912  h128.high64 = XXH3_avalanche(h128.high64);
5913  return h128;
5914  } }
5915 }
5916 
5917 /*
5918  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
5919  */
5920 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5921 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5922 {
5923  XXH_ASSERT(len <= 16);
5924  { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
5925  if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
5926  if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
5927  { XXH128_hash_t h128;
5928  xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
5929  xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
5930  h128.low64 = XXH64_avalanche(seed ^ bitflipl);
5931  h128.high64 = XXH64_avalanche( seed ^ bitfliph);
5932  return h128;
5933  } }
5934 }
5935 
5936 /*
5937  * A bit slower than XXH3_mix16B, but handles multiply by zero better.
5938  */
5939 XXH_FORCE_INLINE XXH128_hash_t
5940 XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
5941  const xxh_u8* secret, XXH64_hash_t seed)
5942 {
5943  acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
5944  acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
5945  acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
5946  acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
5947  return acc;
5948 }
5949 
5950 
5951 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5952 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5953  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5954  XXH64_hash_t seed)
5955 {
5956  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
5957  XXH_ASSERT(16 < len && len <= 128);
5958 
5959  { XXH128_hash_t acc;
5960  acc.low64 = len * XXH_PRIME64_1;
5961  acc.high64 = 0;
5962 
5963 #if XXH_SIZE_OPT >= 1
5964  {
5965  /* Smaller, but slightly slower. */
5966  unsigned int i = (unsigned int)(len - 1) / 32;
5967  do {
5968  acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
5969  } while (i-- != 0);
5970  }
5971 #else
5972  if (len > 32) {
5973  if (len > 64) {
5974  if (len > 96) {
5975  acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
5976  }
5977  acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
5978  }
5979  acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
5980  }
5981  acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
5982 #endif
5983  { XXH128_hash_t h128;
5984  h128.low64 = acc.low64 + acc.high64;
5985  h128.high64 = (acc.low64 * XXH_PRIME64_1)
5986  + (acc.high64 * XXH_PRIME64_4)
5987  + ((len - seed) * XXH_PRIME64_2);
5988  h128.low64 = XXH3_avalanche(h128.low64);
5989  h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
5990  return h128;
5991  }
5992  }
5993 }
5994 
5995 XXH_NO_INLINE XXH_PUREF XXH128_hash_t
5996 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5997  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5998  XXH64_hash_t seed)
5999 {
6000  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
6001  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
6002 
6003  { XXH128_hash_t acc;
6004  unsigned i;
6005  acc.low64 = len * XXH_PRIME64_1;
6006  acc.high64 = 0;
6007  /*
6008  * We set as `i` as offset + 32. We do this so that unchanged
6009  * `len` can be used as upper bound. This reaches a sweet spot
6010  * where both x86 and aarch64 get simple agen and good codegen
6011  * for the loop.
6012  */
6013  for (i = 32; i < 160; i += 32) {
6014  acc = XXH128_mix32B(acc,
6015  input + i - 32,
6016  input + i - 16,
6017  secret + i - 32,
6018  seed);
6019  }
6020  acc.low64 = XXH3_avalanche(acc.low64);
6021  acc.high64 = XXH3_avalanche(acc.high64);
6022  /*
6023  * NB: `i <= len` will duplicate the last 32-bytes if
6024  * len % 32 was zero. This is an unfortunate necessity to keep
6025  * the hash result stable.
6026  */
6027  for (i=160; i <= len; i += 32) {
6028  acc = XXH128_mix32B(acc,
6029  input + i - 32,
6030  input + i - 16,
6031  secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
6032  seed);
6033  }
6034  /* last bytes */
6035  acc = XXH128_mix32B(acc,
6036  input + len - 16,
6037  input + len - 32,
6038  secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
6039  (XXH64_hash_t)0 - seed);
6040 
6041  { XXH128_hash_t h128;
6042  h128.low64 = acc.low64 + acc.high64;
6043  h128.high64 = (acc.low64 * XXH_PRIME64_1)
6044  + (acc.high64 * XXH_PRIME64_4)
6045  + ((len - seed) * XXH_PRIME64_2);
6046  h128.low64 = XXH3_avalanche(h128.low64);
6047  h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
6048  return h128;
6049  }
6050  }
6051 }
6052 
6053 XXH_FORCE_INLINE XXH128_hash_t
6054 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
6055  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6056  XXH3_f_accumulate f_acc,
6057  XXH3_f_scrambleAcc f_scramble)
6058 {
6059  XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
6060 
6061  XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
6062 
6063  /* converge into final hash */
6064  XXH_STATIC_ASSERT(sizeof(acc) == 64);
6065  XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6066  { XXH128_hash_t h128;
6067  h128.low64 = XXH3_mergeAccs(acc,
6068  secret + XXH_SECRET_MERGEACCS_START,
6069  (xxh_u64)len * XXH_PRIME64_1);
6070  h128.high64 = XXH3_mergeAccs(acc,
6071  secret + secretSize
6072  - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6073  ~((xxh_u64)len * XXH_PRIME64_2));
6074  return h128;
6075  }
6076 }
6077 
6078 /*
6079  * It's important for performance that XXH3_hashLong() is not inlined.
6080  */
6081 XXH_NO_INLINE XXH_PUREF XXH128_hash_t
6082 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
6083  XXH64_hash_t seed64,
6084  const void* XXH_RESTRICT secret, size_t secretLen)
6085 {
6086  (void)seed64; (void)secret; (void)secretLen;
6087  return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
6088  XXH3_accumulate, XXH3_scrambleAcc);
6089 }
6090 
6091 /*
6092  * It's important for performance to pass @p secretLen (when it's static)
6093  * to the compiler, so that it can properly optimize the vectorized loop.
6094  *
6095  * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
6096  * breaks -Og, this is XXH_NO_INLINE.
6097  */
6098 XXH3_WITH_SECRET_INLINE XXH128_hash_t
6099 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
6100  XXH64_hash_t seed64,
6101  const void* XXH_RESTRICT secret, size_t secretLen)
6102 {
6103  (void)seed64;
6104  return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
6105  XXH3_accumulate, XXH3_scrambleAcc);
6106 }
6107 
6108 XXH_FORCE_INLINE XXH128_hash_t
6109 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
6110  XXH64_hash_t seed64,
6111  XXH3_f_accumulate f_acc,
6112  XXH3_f_scrambleAcc f_scramble,
6113  XXH3_f_initCustomSecret f_initSec)
6114 {
6115  if (seed64 == 0)
6116  return XXH3_hashLong_128b_internal(input, len,
6117  XXH3_kSecret, sizeof(XXH3_kSecret),
6118  f_acc, f_scramble);
6119  { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6120  f_initSec(secret, seed64);
6121  return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
6122  f_acc, f_scramble);
6123  }
6124 }
6125 
6126 /*
6127  * It's important for performance that XXH3_hashLong is not inlined.
6128  */
6129 XXH_NO_INLINE XXH128_hash_t
6130 XXH3_hashLong_128b_withSeed(const void* input, size_t len,
6131  XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
6132 {
6133  (void)secret; (void)secretLen;
6134  return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
6135  XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
6136 }
6137 
6138 typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
6139  XXH64_hash_t, const void* XXH_RESTRICT, size_t);
6140 
6141 XXH_FORCE_INLINE XXH128_hash_t
6142 XXH3_128bits_internal(const void* input, size_t len,
6143  XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
6144  XXH3_hashLong128_f f_hl128)
6145 {
6146  XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
6147  /*
6148  * If an action is to be taken if `secret` conditions are not respected,
6149  * it should be done here.
6150  * For now, it's a contract pre-condition.
6151  * Adding a check and a branch here would cost performance at every hash.
6152  */
6153  if (len <= 16)
6154  return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
6155  if (len <= 128)
6156  return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6157  if (len <= XXH3_MIDSIZE_MAX)
6158  return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6159  return f_hl128(input, len, seed64, secret, secretLen);
6160 }
6161 
6162 
6163 /* === Public XXH128 API === */
6164 
6167 {
6168  return XXH3_128bits_internal(input, len, 0,
6169  XXH3_kSecret, sizeof(XXH3_kSecret),
6170  XXH3_hashLong_128b_default);
6171 }
6172 
6175 XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
6176 {
6177  return XXH3_128bits_internal(input, len, 0,
6178  (const xxh_u8*)secret, secretSize,
6179  XXH3_hashLong_128b_withSecret);
6180 }
6181 
6184 XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6185 {
6186  return XXH3_128bits_internal(input, len, seed,
6187  XXH3_kSecret, sizeof(XXH3_kSecret),
6188  XXH3_hashLong_128b_withSeed);
6189 }
6190 
6193 XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6194 {
6195  if (len <= XXH3_MIDSIZE_MAX)
6196  return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
6197  return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
6198 }
6199 
6202 XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6203 {
6204  return XXH3_128bits_withSeed(input, len, seed);
6205 }
6206 
6207 
6208 /* === XXH3 128-bit streaming === */
6209 #ifndef XXH_NO_STREAM
6210 /*
6211  * All initialization and update functions are identical to 64-bit streaming variant.
6212  * The only difference is the finalization routine.
6213  */
6214 
6218 {
6219  return XXH3_64bits_reset(statePtr);
6220 }
6221 
6224 XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
6225 {
6226  return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
6227 }
6228 
6232 {
6233  return XXH3_64bits_reset_withSeed(statePtr, seed);
6234 }
6235 
6238 XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6239 {
6240  return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
6241 }
6242 
6245 XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
6246 {
6247  return XXH3_64bits_update(state, input, len);
6248 }
6249 
6252 {
6253  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6254  if (state->totalLen > XXH3_MIDSIZE_MAX) {
6255  XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
6256  XXH3_digest_long(acc, state, secret);
6257  XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6258  { XXH128_hash_t h128;
6259  h128.low64 = XXH3_mergeAccs(acc,
6260  secret + XXH_SECRET_MERGEACCS_START,
6261  (xxh_u64)state->totalLen * XXH_PRIME64_1);
6262  h128.high64 = XXH3_mergeAccs(acc,
6263  secret + state->secretLimit + XXH_STRIPE_LEN
6264  - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6265  ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
6266  return h128;
6267  }
6268  }
6269  /* len <= XXH3_MIDSIZE_MAX : short code */
6270  if (state->seed)
6271  return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
6272  return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
6273  secret, state->secretLimit + XXH_STRIPE_LEN);
6274 }
6275 #endif /* !XXH_NO_STREAM */
6276 /* 128-bit utility functions */
6277 
6278 #include <string.h> /* memcmp, memcpy */
6279 
6280 /* return : 1 is equal, 0 if different */
6283 {
6284  /* note : XXH128_hash_t is compact, it has no padding byte */
6285  return !(memcmp(&h1, &h2, sizeof(h1)));
6286 }
6287 
6288 /* This prototype is compatible with stdlib's qsort().
6289  * @return : >0 if *h128_1 > *h128_2
6290  * <0 if *h128_1 < *h128_2
6291  * =0 if *h128_1 == *h128_2 */
6293 XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
6294 {
6295  XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
6296  XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
6297  int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
6298  /* note : bets that, in most cases, hash values are different */
6299  if (hcmp) return hcmp;
6300  return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
6301 }
6302 
6303 
6304 /*====== Canonical representation ======*/
6306 XXH_PUBLIC_API void
6308 {
6309  XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
6310  if (XXH_CPU_LITTLE_ENDIAN) {
6311  hash.high64 = XXH_swap64(hash.high64);
6312  hash.low64 = XXH_swap64(hash.low64);
6313  }
6314  XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
6315  XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
6316 }
6317 
6321 {
6322  XXH128_hash_t h;
6323  h.high64 = XXH_readBE64(src);
6324  h.low64 = XXH_readBE64(src->digest + 8);
6325  return h;
6326 }
6327 
6328 
6329 
6330 /* ==========================================
6331  * Secret generators
6332  * ==========================================
6333  */
6334 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
6335 
6336 XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
6337 {
6338  XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
6339  XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
6340 }
6341 
6344 XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
6345 {
6346 #if (XXH_DEBUGLEVEL >= 1)
6347  XXH_ASSERT(secretBuffer != NULL);
6348  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
6349 #else
6350  /* production mode, assert() are disabled */
6351  if (secretBuffer == NULL) return XXH_ERROR;
6352  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
6353 #endif
6354 
6355  if (customSeedSize == 0) {
6356  customSeed = XXH3_kSecret;
6357  customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6358  }
6359 #if (XXH_DEBUGLEVEL >= 1)
6360  XXH_ASSERT(customSeed != NULL);
6361 #else
6362  if (customSeed == NULL) return XXH_ERROR;
6363 #endif
6364 
6365  /* Fill secretBuffer with a copy of customSeed - repeat as needed */
6366  { size_t pos = 0;
6367  while (pos < secretSize) {
6368  size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
6369  memcpy((char*)secretBuffer + pos, customSeed, toCopy);
6370  pos += toCopy;
6371  } }
6372 
6373  { size_t const nbSeg16 = secretSize / 16;
6374  size_t n;
6375  XXH128_canonical_t scrambler;
6376  XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
6377  for (n=0; n<nbSeg16; n++) {
6378  XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
6379  XXH3_combine16((char*)secretBuffer + n*16, h128);
6380  }
6381  /* last segment */
6382  XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
6383  }
6384  return XXH_OK;
6385 }
6386 
6388 XXH_PUBLIC_API void
6389 XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
6390 {
6391  XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6392  XXH3_initCustomSecret(secret, seed);
6393  XXH_ASSERT(secretBuffer != NULL);
6394  memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6395 }
6396 
6397 
6398 
6399 /* Pop our optimization override from above */
6400 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
6401  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
6402  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
6403 # pragma GCC pop_options
6404 #endif
6405 
6406 #endif /* XXH_NO_LONG_LONG */
6407 
6408 #endif /* XXH_NO_XXH3 */
6409 
6413 #endif /* XXH_IMPLEMENTATION */
6414 
6415 
6416 #if defined (__cplusplus)
6417 } /* extern "C" */
6418 #endif
python.CaloBCIDAvgAlgConfig.acc3
def acc3
Definition: CaloBCIDAvgAlgConfig.py:69
base
std::string base
Definition: hcg.cxx:78
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
python.SystemOfUnits.s
int s
Definition: SystemOfUnits.py:131
get_generator_info.result
result
Definition: get_generator_info.py:21
XXH3_128bits_digest
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr)
XXH32_update
XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *statePtr, const void *input, size_t length)
Consumes a block of input to an XXH32_state_t.
xAOD::uint8_t
uint8_t
Definition: Muon_v1.cxx:557
XXH64_update
XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
XXH32_digest
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr)
Returns the calculated hash value from an XXH32_state_t.
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
WriteCellNoiseToCool.src
src
Definition: WriteCellNoiseToCool.py:513
XXH3_SECRET_SIZE_MIN
#define XXH3_SECRET_SIZE_MIN
The bare minimum size for a custom secret.
Definition: xxhash.h:950
XXH3_128bits_reset
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr)
mergePhysValFiles.start
start
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:14
extractSporadic.c1
c1
Definition: extractSporadic.py:134
XXH64_createState
XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t * XXH64_createState(void)
XXH_VERSION_NUMBER
#define XXH_VERSION_NUMBER
Version number, encoded as two digits each.
Definition: xxhash.h:453
XXH3_copyState
XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t *dst_state, XXH_NOESCAPE const XXH3_state_t *src_state)
Trk::one
@ one
Definition: TrkDetDescr/TrkSurfaces/TrkSurfaces/RealQuadraticEquation.h:22
XXH3_createState
XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t * XXH3_createState(void)
XXH64_hash_t
unsigned long long XXH64_hash_t
Definition: xxhash.h:794
upper
int upper(int c)
Definition: LArBadChannelParser.cxx:49
perfmonmt-printer.dest
dest
Definition: perfmonmt-printer.py:189
XXH_CONSTF
#define XXH_CONSTF
Definition: xxhash.h:441
XXH128_canonicalFromHash
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t *dst, XXH128_hash_t hash)
dbg::ptr
void * ptr(T *p)
Definition: SGImplSvc.cxx:74
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:560
XXH_NOESCAPE
#define XXH_NOESCAPE
Definition: xxhash.h:761
read_hist_ntuple.h1
h1
Definition: read_hist_ntuple.py:21
XXH32_state_t
struct XXH32_state_s XXH32_state_t
Streaming functions generate the xxHash value from an incremental input.
Definition: xxhash.h:582
x
#define x
xAOD::unsigned
unsigned
Definition: RingSetConf_v1.cxx:662
Trk::u
@ u
Enums for curvilinear frames.
Definition: ParamDefs.h:77
XXH_OK
@ XXH_OK
OK.
Definition: xxhash.h:474
python.utils.AtlRunQueryLookup.mask
string mask
Definition: AtlRunQueryLookup.py:460
XXH64_freeState
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr)
XXH_versionNumber
XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber(void)
Obtains the xxHash version.
python.setupRTTAlg.size
int size
Definition: setupRTTAlg.py:39
skel.input1
tuple input1
Definition: skel.GENtoEVGEN.py:750
python.DataFormatRates.c3
c3
Definition: DataFormatRates.py:127
H5Utils::internal::packed
H5::CompType packed(H5::CompType in)
Definition: common.cxx:16
XXH64
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed)
Calculates the 64-bit hash of input using xxHash64.
python.utils.AtlRunQueryDQUtils.p
p
Definition: AtlRunQueryDQUtils.py:210
createCoolChannelIdFile.buffer
buffer
Definition: createCoolChannelIdFile.py:12
XXH3_128bits_withSeed
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void *data, size_t len, XXH64_hash_t seed)
Seeded 128-bit variant of XXH3.
Generate_dsid_ranseed.seed
seed
Definition: Generate_dsid_ranseed.py:10
convertTimingResiduals.sum
sum
Definition: convertTimingResiduals.py:55
XXH128_hash_t
The return value from 128-bit hashes.
Definition: xxhash.h:1033
lumiFormat.i
int i
Definition: lumiFormat.py:85
python.LArBadChannelDBAlg.xFFFFFFFF
xFFFFFFFF
Definition: LArBadChannelDBAlg.py:73
XXH_PUREF
#define XXH_PUREF
Definition: xxhash.h:442
XXH_PUBLIC_API
#define XXH_PUBLIC_API
Marks a global symbol.
Definition: xxhash.h:432
beamspotman.n
n
Definition: beamspotman.py:731
python.CaloBCIDAvgAlgConfig.acc1
def acc1
Definition: CaloBCIDAvgAlgConfig.py:49
extractSporadic.h
list h
Definition: extractSporadic.py:97
XXH32_reset
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed)
Resets an XXH32_state_t to begin a new hash.
PlotPulseshapeFromCool.input
input
Definition: PlotPulseshapeFromCool.py:106
XXH128_cmp
XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void *h128_1, XXH_NOESCAPE const void *h128_2)
Compares two XXH128_hash_t This comparator is compatible with stdlib's qsort()/bsearch().
python.CaloBCIDAvgAlgConfig.acc2
def acc2
Definition: CaloBCIDAvgAlgConfig.py:59
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
AthenaPoolTestRead.acc
acc
Definition: AthenaPoolTestRead.py:16
XXH32_freeState
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr)
Frees an XXH32_state_t.
TrigInDetValidation_Base.malloc
malloc
Definition: TrigInDetValidation_Base.py:132
XXH_MALLOCF
#define XXH_MALLOCF
Definition: xxhash.h:443
XXH32_hashFromCanonical
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src)
Converts an XXH32_canonical_t to a native XXH32_hash_t.
XXH3_64bits_reset_withSecret
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *secret, size_t secretSize)
XXH3_64bits_reset_withSecret(): secret is referenced, it must outlive the hash streaming session.
XXH3_128bits
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void *data, size_t len)
Unseeded 128-bit variant of XXH3.
XXH3_freeState
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr)
XXH64_state_t
struct XXH64_state_s XXH64_state_t
The opaque state struct for the XXH64 streaming API.
Definition: xxhash.h:844
XXH3_64bits_reset_withSeed
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed)
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
XXH128_isEqual
XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
XXH128_isEqual(): Return: 1 if h1 and h2 are equal, 0 if they are not.
XXH3_64bits_reset
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr)
XXH3_128bits_update
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
XXH64_canonicalFromHash
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t *dst, XXH64_hash_t hash)
XXH3_128bits_reset_withSecret
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *secret, size_t secretSize)
Custom secret 128-bit variant of XXH3.
python.LumiBlobConversion.pos
pos
Definition: LumiBlobConversion.py:18
XXH64_hashFromCanonical
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t *src)
XXH32_canonical_t
Canonical (big endian) representation of XXH32_hash_t.
Definition: xxhash.h:685
XXH128_hashFromCanonical
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t *src)
XXH3_64bits_withSeed
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed)
64-bit seeded variant of XXH3
ReadCellNoiseFromCoolCompare.v2
v2
Definition: ReadCellNoiseFromCoolCompare.py:364
python.PyAthena.v
v
Definition: PyAthena.py:154
python.DataFormatRates.c2
c2
Definition: DataFormatRates.py:123
XXH3_64bits
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void *input, size_t length)
64-bit unseeded variant of XXH3.
__attribute__
__attribute__((always_inline)) inline uint16_t TileCalibDrawerBase
Definition: TileCalibDrawerBase.h:190
a
TList * a
Definition: liststreamerinfos.cxx:10
XXH_errorcode
XXH_errorcode
Exit code for the streaming API.
Definition: xxhash.h:473
y
#define y
XXH64_copyState
XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t *dst_state, const XXH64_state_t *src_state)
h
CaloCondBlobAlgs_fillNoiseFromASCII.hash
dictionary hash
Definition: CaloCondBlobAlgs_fillNoiseFromASCII.py:109
XXH3_state_t
struct XXH3_state_s XXH3_state_t
The state struct for the XXH3 streaming API.
Definition: xxhash.h:987
Pythia8_RapidityOrderMPI.val
val
Definition: Pythia8_RapidityOrderMPI.py:14
XXH3_64bits_withSecret
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize)
64-bit variant of XXH3 with a custom "secret".
XXH_FALLTHROUGH
#define XXH_FALLTHROUGH
Definition: xxhash.h:750
if
if(febId1==febId2)
Definition: LArRodBlockPhysicsV0.cxx:567
convertTimingResiduals.offset
offset
Definition: convertTimingResiduals.py:71
XXH32
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32(const void *input, size_t length, XXH32_hash_t seed)
Calculates the 32-bit hash of input using xxHash32.
XXH64_digest
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t *statePtr)
XXH3_64bits_digest
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr)
XXH3_64bits_update
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
XXH64_canonical_t
Definition: xxhash.h:854
XXH_ERROR
@ XXH_ERROR
Error.
Definition: xxhash.h:475
python.compressB64.c
def c
Definition: compressB64.py:93
updateCoolNtuple.limit
int limit
Definition: updateCoolNtuple.py:45
XXH3_128bits_reset_withSeed
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed)
length
double length(const pvec &v)
Definition: FPGATrackSimLLPDoubletHoughTransformTool.cxx:26
XXH128_canonical_t
Definition: xxhash.h:1106
XXH128_hash_t::high64
XXH64_hash_t high64
value >> 64
Definition: xxhash.h:1035
XXH32_canonicalFromHash
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, XXH32_hash_t hash)
Converts an XXH32_hash_t to a big endian XXH32_canonical_t.
XXH64_reset
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t *statePtr, XXH64_hash_t seed)
XXH3_128bits_withSecret
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize)
Custom secret 128-bit variant of XXH3.
XXH128_hash_t::low64
XXH64_hash_t low64
value & 0xFFFFFFFFFFFFFFFF
Definition: xxhash.h:1034
XXH32_createState
XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t * XXH32_createState(void)
Allocates an XXH32_state_t.
XXH32_copyState
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t *dst_state, const XXH32_state_t *src_state)
Copies one XXH32_state_t to another.