ATLAS Offline Software
xxhash.h
Go to the documentation of this file.
1 /*
2  * xxHash - Extremely Fast Hash algorithm
3  * Header File
4  * Copyright (C) 2012-2023 Yann Collet
5  *
6  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are
10  * met:
11  *
12  * * Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * * Redistributions in binary form must reproduce the above
15  * copyright notice, this list of conditions and the following disclaimer
16  * in the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * You can contact the author at:
32  * - xxHash homepage: https://www.xxhash.com
33  * - xxHash source repository: https://github.com/Cyan4973/xxHash
34  */
35 
172 #if defined (__cplusplus)
173 extern "C" {
174 #endif
175 
176 /* ****************************
177  * INLINE mode
178  ******************************/
184 #ifdef XXH_DOXYGEN
185 
203 # define XXH_INLINE_ALL
204 # undef XXH_INLINE_ALL
205 
208 # define XXH_PRIVATE_API
209 # undef XXH_PRIVATE_API
210 
223 # define XXH_NAMESPACE /* YOUR NAME HERE */
224 # undef XXH_NAMESPACE
225 #endif
226 
227 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
228  && !defined(XXH_INLINE_ALL_31684351384)
229  /* this section should be traversed only once */
230 # define XXH_INLINE_ALL_31684351384
231  /* give access to the advanced API, required to compile implementations */
232 # undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
233 # define XXH_STATIC_LINKING_ONLY
234  /* make all functions private */
235 # undef XXH_PUBLIC_API
236 # if defined(__GNUC__)
237 # define XXH_PUBLIC_API static __inline __attribute__((unused))
238 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
239 # define XXH_PUBLIC_API static inline
240 # elif defined(_MSC_VER)
241 # define XXH_PUBLIC_API static __inline
242 # else
243  /* note: this version may generate warnings for unused static functions */
244 # define XXH_PUBLIC_API static
245 # endif
246 
247  /*
248  * This part deals with the special case where a unit wants to inline xxHash,
249  * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
250  * such as part of some previously included *.h header file.
251  * Without further action, the new include would just be ignored,
252  * and functions would effectively _not_ be inlined (silent failure).
253  * The following macros solve this situation by prefixing all inlined names,
254  * avoiding naming collision with previous inclusions.
255  */
256  /* Before that, we unconditionally #undef all symbols,
257  * in case they were already defined with XXH_NAMESPACE.
258  * They will then be redefined for XXH_INLINE_ALL
259  */
260 # undef XXH_versionNumber
261  /* XXH32 */
262 # undef XXH32
263 # undef XXH32_createState
264 # undef XXH32_freeState
265 # undef XXH32_reset
266 # undef XXH32_update
267 # undef XXH32_digest
268 # undef XXH32_copyState
269 # undef XXH32_canonicalFromHash
270 # undef XXH32_hashFromCanonical
271  /* XXH64 */
272 # undef XXH64
273 # undef XXH64_createState
274 # undef XXH64_freeState
275 # undef XXH64_reset
276 # undef XXH64_update
277 # undef XXH64_digest
278 # undef XXH64_copyState
279 # undef XXH64_canonicalFromHash
280 # undef XXH64_hashFromCanonical
281  /* XXH3_64bits */
282 # undef XXH3_64bits
283 # undef XXH3_64bits_withSecret
284 # undef XXH3_64bits_withSeed
285 # undef XXH3_64bits_withSecretandSeed
286 # undef XXH3_createState
287 # undef XXH3_freeState
288 # undef XXH3_copyState
289 # undef XXH3_64bits_reset
290 # undef XXH3_64bits_reset_withSeed
291 # undef XXH3_64bits_reset_withSecret
292 # undef XXH3_64bits_update
293 # undef XXH3_64bits_digest
294 # undef XXH3_generateSecret
295  /* XXH3_128bits */
296 # undef XXH128
297 # undef XXH3_128bits
298 # undef XXH3_128bits_withSeed
299 # undef XXH3_128bits_withSecret
300 # undef XXH3_128bits_reset
301 # undef XXH3_128bits_reset_withSeed
302 # undef XXH3_128bits_reset_withSecret
303 # undef XXH3_128bits_reset_withSecretandSeed
304 # undef XXH3_128bits_update
305 # undef XXH3_128bits_digest
306 # undef XXH128_isEqual
307 # undef XXH128_cmp
308 # undef XXH128_canonicalFromHash
309 # undef XXH128_hashFromCanonical
310  /* Finally, free the namespace itself */
311 # undef XXH_NAMESPACE
312 
313  /* employ the namespace for XXH_INLINE_ALL */
314 # define XXH_NAMESPACE XXH_INLINE_
315  /*
316  * Some identifiers (enums, type names) are not symbols,
317  * but they must nonetheless be renamed to avoid redeclaration.
318  * Alternative solution: do not redeclare them.
319  * However, this requires some #ifdefs, and has a more dispersed impact.
320  * Meanwhile, renaming can be achieved in a single place.
321  */
322 # define XXH_IPREF(Id) XXH_NAMESPACE ## Id
323 # define XXH_OK XXH_IPREF(XXH_OK)
324 # define XXH_ERROR XXH_IPREF(XXH_ERROR)
325 # define XXH_errorcode XXH_IPREF(XXH_errorcode)
326 # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
327 # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
328 # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
329 # define XXH32_state_s XXH_IPREF(XXH32_state_s)
330 # define XXH32_state_t XXH_IPREF(XXH32_state_t)
331 # define XXH64_state_s XXH_IPREF(XXH64_state_s)
332 # define XXH64_state_t XXH_IPREF(XXH64_state_t)
333 # define XXH3_state_s XXH_IPREF(XXH3_state_s)
334 # define XXH3_state_t XXH_IPREF(XXH3_state_t)
335 # define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
336  /* Ensure the header is parsed again, even if it was previously included */
337 # undef XXHASH_H_5627135585666179
338 # undef XXHASH_H_STATIC_13879238742
339 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
340 
341 /* ****************************************************************
342  * Stable API
343  *****************************************************************/
344 #ifndef XXHASH_H_5627135585666179
345 #define XXHASH_H_5627135585666179 1
346 
348 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
349 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
350 # ifdef XXH_EXPORT
351 # define XXH_PUBLIC_API __declspec(dllexport)
352 # elif XXH_IMPORT
353 # define XXH_PUBLIC_API __declspec(dllimport)
354 # endif
355 # else
356 # define XXH_PUBLIC_API /* do nothing */
357 # endif
358 #endif
359 
360 #ifdef XXH_NAMESPACE
361 # define XXH_CAT(A,B) A##B
362 # define XXH_NAME2(A,B) XXH_CAT(A,B)
363 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
364 /* XXH32 */
365 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
366 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
367 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
368 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
369 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
370 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
371 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
372 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
373 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
374 /* XXH64 */
375 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
376 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
377 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
378 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
379 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
380 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
381 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
382 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
383 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
384 /* XXH3_64bits */
385 # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
386 # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
387 # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
388 # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
389 # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
390 # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
391 # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
392 # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
393 # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
394 # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
395 # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
396 # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
397 # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
398 # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
399 # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
400 /* XXH3_128bits */
401 # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
402 # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
403 # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
404 # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
405 # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
406 # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
407 # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
408 # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
409 # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
410 # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
411 # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
412 # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
413 # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
414 # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
415 # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
416 #endif
417 
418 
419 /* *************************************
420 * Compiler specifics
421 ***************************************/
422 
423 /* specific declaration modes for Windows */
424 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
425 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
426 # ifdef XXH_EXPORT
427 # define XXH_PUBLIC_API __declspec(dllexport)
428 # elif XXH_IMPORT
429 # define XXH_PUBLIC_API __declspec(dllimport)
430 # endif
431 # else
432 # define XXH_PUBLIC_API /* do nothing */
433 # endif
434 #endif
435 
436 #if defined (__GNUC__)
437 # define XXH_CONSTF __attribute__((const))
438 # define XXH_PUREF __attribute__((pure))
439 # define XXH_MALLOCF __attribute__((malloc))
440 #else
441 # define XXH_CONSTF /* disable */
442 # define XXH_PUREF
443 # define XXH_MALLOCF
444 #endif
445 
446 /* *************************************
447 * Version
448 ***************************************/
449 #define XXH_VERSION_MAJOR 0
450 #define XXH_VERSION_MINOR 8
451 #define XXH_VERSION_RELEASE 2
452 
453 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
454 
464 
465 
466 /* ****************************
467 * Common basic types
468 ******************************/
469 #include <stddef.h> /* size_t */
473 typedef enum {
474  XXH_OK = 0,
475  XXH_ERROR
477 
478 
479 /*-**********************************************************************
480 * 32-bit hash
481 ************************************************************************/
482 #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
483 
488 typedef uint32_t XXH32_hash_t;
489 
490 #elif !defined (__VMS) \
491  && (defined (__cplusplus) \
492  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
493 # include <stdint.h>
494  typedef uint32_t XXH32_hash_t;
495 
496 #else
497 # include <limits.h>
498 # if UINT_MAX == 0xFFFFFFFFUL
499  typedef unsigned int XXH32_hash_t;
500 # elif ULONG_MAX == 0xFFFFFFFFUL
501  typedef unsigned long XXH32_hash_t;
502 # else
503 # error "unsupported platform: need a 32-bit type"
504 # endif
505 #endif
506 
548 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
549 
550 #ifndef XXH_NO_STREAM
551 
582 typedef struct XXH32_state_s XXH32_state_t;
583 
607 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
608 
622 XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
623 
643 
658 XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
659 #endif /* !XXH_NO_STREAM */
660 
661 /******* Canonical representation *******/
662 
663 /*
664  * The default return values from XXH functions are unsigned 32 and 64 bit
665  * integers.
666  * This the simplest and fastest format for further post-processing.
667  *
668  * However, this leaves open the question of what is the order on the byte level,
669  * since little and big endian conventions will store the same number differently.
670  *
671  * The canonical representation settles this issue by mandating big-endian
672  * convention, the same convention as human-readable numbers (large digits first).
673  *
674  * When writing hash values to storage, sending them over a network, or printing
675  * them, it's highly recommended to use the canonical representation to ensure
676  * portability across a wider range of systems, present and future.
677  *
678  * The following functions allow transformation of hash values to and from
679  * canonical format.
680  */
681 
685 typedef struct {
686  unsigned char digest[4];
688 
699 
711 
712 
713 #ifdef __has_attribute
714 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
715 #else
716 # define XXH_HAS_ATTRIBUTE(x) 0
717 #endif
718 
719 /*
720  * C23 __STDC_VERSION__ number hasn't been specified yet. For now
721  * leave as `201711L` (C17 + 1).
722  * TODO: Update to correct value when its been specified.
723  */
724 #define XXH_C23_VN 201711L
725 
726 /* C-language Attributes are added in C23. */
727 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
728 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
729 #else
730 # define XXH_HAS_C_ATTRIBUTE(x) 0
731 #endif
732 
733 #if defined(__cplusplus) && defined(__has_cpp_attribute)
734 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
735 #else
736 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
737 #endif
738 
739 /*
740  * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
741  * introduced in CPP17 and C23.
742  * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
743  * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
744  */
745 #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
746 # define XXH_FALLTHROUGH [[fallthrough]]
747 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
748 # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
749 #else
750 # define XXH_FALLTHROUGH /* fallthrough */
751 #endif
752 
753 /*
754  * Define XXH_NOESCAPE for annotated pointers in public API.
755  * https://clang.llvm.org/docs/AttributeReference.html#noescape
756  * As of writing this, only supported by clang.
757  */
758 #if XXH_HAS_ATTRIBUTE(noescape)
759 # define XXH_NOESCAPE __attribute__((noescape))
760 #else
761 # define XXH_NOESCAPE
762 #endif
763 
764 
771 #ifndef XXH_NO_LONG_LONG
772 /*-**********************************************************************
773 * 64-bit hash
774 ************************************************************************/
775 #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
776 
781 typedef uint64_t XXH64_hash_t;
782 #elif !defined (__VMS) \
783  && (defined (__cplusplus) \
784  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
785 # include <stdint.h>
786  typedef uint64_t XXH64_hash_t;
787 #else
788 # include <limits.h>
789 # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
790  /* LP64 ABI says uint64_t is unsigned long */
791  typedef unsigned long XXH64_hash_t;
792 # else
793  /* the following type must have a width of 64-bit */
794  typedef unsigned long long XXH64_hash_t;
795 # endif
796 #endif
797 
836 
837 /******* Streaming *******/
838 #ifndef XXH_NO_STREAM
839 
844 typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
848 
852 #endif /* !XXH_NO_STREAM */
853 /******* Canonical representation *******/
854 typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
857 
858 #ifndef XXH_NO_XXH3
859 
906 /*-**********************************************************************
907 * XXH3 64-bit variant
908 ************************************************************************/
909 
925 
942 
950 #define XXH3_SECRET_SIZE_MIN 136
951 
970 XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
971 
972 
973 /******* Streaming *******/
974 #ifndef XXH_NO_STREAM
975 /*
976  * Streaming requires state maintenance.
977  * This operation costs memory and CPU.
978  * As a consequence, streaming is slower than one-shot hashing.
979  * For better performance, prefer one-shot functions whenever applicable.
980  */
981 
987 typedef struct XXH3_state_s XXH3_state_t;
991 
992 /*
993  * XXH3_64bits_reset():
994  * Initialize with default parameters.
995  * digest will be equivalent to `XXH3_64bits()`.
996  */
998 /*
999  * XXH3_64bits_reset_withSeed():
1000  * Generate a custom secret from `seed`, and store it into `statePtr`.
1001  * digest will be equivalent to `XXH3_64bits_withSeed()`.
1002  */
1014 
1017 #endif /* !XXH_NO_STREAM */
1018 
1019 /* note : canonical representation of XXH3 is the same as XXH64
1020  * since they both produce XXH64_hash_t values */
1021 
1022 
1023 /*-**********************************************************************
1024 * XXH3 128-bit variant
1025 ************************************************************************/
1026 
1033 typedef struct {
1036 } XXH128_hash_t;
1037 
1059 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
1060 
1061 /******* Streaming *******/
1062 #ifndef XXH_NO_STREAM
1063 /*
1064  * Streaming requires state maintenance.
1065  * This operation costs memory and CPU.
1066  * As a consequence, streaming is slower than one-shot hashing.
1067  * For better performance, prefer one-shot functions whenever applicable.
1068  *
1069  * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
1070  * Use already declared XXH3_createState() and XXH3_freeState().
1071  *
1072  * All reset and streaming functions have same meaning as their 64-bit counterpart.
1073  */
1074 
1079 
1082 #endif /* !XXH_NO_STREAM */
1083 
1084 /* Following helper functions make it possible to compare XXH128_hast_t values.
1085  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
1086  * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
1087 
1093 
1102 XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
1103 
1104 
1105 /******* Canonical representation *******/
1106 typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
1109 
1110 
1111 #endif /* !XXH_NO_XXH3 */
1112 #endif /* XXH_NO_LONG_LONG */
1113 
1117 #endif /* XXHASH_H_5627135585666179 */
1118 
1119 
1120 
1121 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
1122 #define XXHASH_H_STATIC_13879238742
1123 /* ****************************************************************************
1124  * This section contains declarations which are not guaranteed to remain stable.
1125  * They may change in future versions, becoming incompatible with a different
1126  * version of the library.
1127  * These declarations should only be used with static linking.
1128  * Never use them in association with dynamic linking!
1129  ***************************************************************************** */
1130 
1131 /*
1132  * These definitions are only present to allow static allocation
1133  * of XXH states, on stack or in a struct, for example.
1134  * Never **ever** access their members directly.
1135  */
1136 
1149 struct XXH32_state_s {
1150  XXH32_hash_t total_len_32;
1151  XXH32_hash_t large_len;
1152  XXH32_hash_t v[4];
1153  XXH32_hash_t mem32[4];
1154  XXH32_hash_t memsize;
1155  XXH32_hash_t reserved;
1156 }; /* typedef'd to XXH32_state_t */
1157 
1158 
1159 #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
1160 
1173 struct XXH64_state_s {
1174  XXH64_hash_t total_len;
1175  XXH64_hash_t v[4];
1176  XXH64_hash_t mem64[4];
1177  XXH32_hash_t memsize;
1178  XXH32_hash_t reserved32;
1179  XXH64_hash_t reserved64;
1180 }; /* typedef'd to XXH64_state_t */
1181 
1182 #ifndef XXH_NO_XXH3
1183 
1184 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
1185 # include <stdalign.h>
1186 # define XXH_ALIGN(n) alignas(n)
1187 #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
1188 /* In C++ alignas() is a keyword */
1189 # define XXH_ALIGN(n) alignas(n)
1190 #elif defined(__GNUC__)
1191 # define XXH_ALIGN(n) __attribute__ ((aligned(n)))
1192 #elif defined(_MSC_VER)
1193 # define XXH_ALIGN(n) __declspec(align(n))
1194 #else
1195 # define XXH_ALIGN(n) /* disabled */
1196 #endif
1197 
1198 /* Old GCC versions only accept the attribute after the type in structures. */
1199 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
1200  && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
1201  && defined(__GNUC__)
1202 # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1203 #else
1204 # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1205 #endif
1206 
1214 #define XXH3_INTERNALBUFFER_SIZE 256
1215 
1223 #define XXH3_SECRET_DEFAULT_SIZE 192
1224 
1247 struct XXH3_state_s {
1248  XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
1250  XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1252  XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1254  XXH32_hash_t bufferedSize;
1256  XXH32_hash_t useSeed;
1258  size_t nbStripesSoFar;
1260  XXH64_hash_t totalLen;
1262  size_t nbStripesPerBlock;
1264  size_t secretLimit;
1268  XXH64_hash_t reserved64;
1270  const unsigned char* extSecret;
1273  /* note: there may be some padding at the end due to alignment on 64 bytes */
1274 }; /* typedef'd to XXH3_state_t */
1275 
1276 #undef XXH_ALIGN_MEMBER
1277 
1289 #define XXH3_INITSTATE(XXH3_state_ptr) \
1290  do { \
1291  XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
1292  tmp_xxh3_state_ptr->seed = 0; \
1293  tmp_xxh3_state_ptr->extSecret = NULL; \
1294  } while(0)
1295 
1296 
1300 XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
1301 
1302 
1303 /* === Experimental API === */
1304 /* Symbols defined below must be considered tied to a specific library version. */
1305 
1357 XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
1358 
1396 XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
1397 
1425 XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
1426  XXH_NOESCAPE const void* secret, size_t secretSize,
1427  XXH64_hash_t seed);
1430 XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
1431  XXH_NOESCAPE const void* secret, size_t secretSize,
1432  XXH64_hash_t seed64);
1433 #ifndef XXH_NO_STREAM
1434 
1436 XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1437  XXH_NOESCAPE const void* secret, size_t secretSize,
1438  XXH64_hash_t seed64);
1441 XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1442  XXH_NOESCAPE const void* secret, size_t secretSize,
1443  XXH64_hash_t seed64);
1444 #endif /* !XXH_NO_STREAM */
1445 
1446 #endif /* !XXH_NO_XXH3 */
1447 #endif /* XXH_NO_LONG_LONG */
1448 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1449 # define XXH_IMPLEMENTATION
1450 #endif
1451 
1452 #endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
1453 
1454 
1455 /* ======================================================================== */
1456 /* ======================================================================== */
1457 /* ======================================================================== */
1458 
1459 
1460 /*-**********************************************************************
1461  * xxHash implementation
1462  *-**********************************************************************
1463  * xxHash's implementation used to be hosted inside xxhash.c.
1464  *
1465  * However, inlining requires implementation to be visible to the compiler,
1466  * hence be included alongside the header.
1467  * Previously, implementation was hosted inside xxhash.c,
1468  * which was then #included when inlining was activated.
1469  * This construction created issues with a few build and install systems,
1470  * as it required xxhash.c to be stored in /include directory.
1471  *
1472  * xxHash implementation is now directly integrated within xxhash.h.
1473  * As a consequence, xxhash.c is no longer needed in /include.
1474  *
1475  * xxhash.c is still available and is still useful.
1476  * In a "normal" setup, when xxhash is not inlined,
1477  * xxhash.h only exposes the prototypes and public symbols,
1478  * while xxhash.c can be built into an object file xxhash.o
1479  * which can then be linked into the final binary.
1480  ************************************************************************/
1481 
1482 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1483  || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1484 # define XXH_IMPLEM_13a8737387
1485 
1486 /* *************************************
1487 * Tuning parameters
1488 ***************************************/
1489 
1496 #ifdef XXH_DOXYGEN
1497 
1502 # define XXH_NO_LONG_LONG
1503 # undef XXH_NO_LONG_LONG /* don't actually */
1504 
1554 # define XXH_FORCE_MEMORY_ACCESS 0
1555 
1582 # define XXH_SIZE_OPT 0
1583 
1612 # define XXH_FORCE_ALIGN_CHECK 0
1613 
1634 # define XXH_NO_INLINE_HINTS 0
1635 
1651 # define XXH3_INLINE_SECRET 0
1652 
1663 # define XXH32_ENDJMP 0
1664 
1672 # define XXH_OLD_NAMES
1673 # undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
1674 
1683 # define XXH_NO_STREAM
1684 # undef XXH_NO_STREAM /* don't actually */
1685 #endif /* XXH_DOXYGEN */
1686 
1690 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
1691  /* prefer __packed__ structures (method 1) for GCC
1692  * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
1693  * which for some reason does unaligned loads. */
1694 # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1695 # define XXH_FORCE_MEMORY_ACCESS 1
1696 # endif
1697 #endif
1698 
1699 #ifndef XXH_SIZE_OPT
1700  /* default to 1 for -Os or -Oz */
1701 # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1702 # define XXH_SIZE_OPT 1
1703 # else
1704 # define XXH_SIZE_OPT 0
1705 # endif
1706 #endif
1707 
1708 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
1709  /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
1710 # if XXH_SIZE_OPT >= 1 || \
1711  defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
1712  || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
1713 # define XXH_FORCE_ALIGN_CHECK 0
1714 # else
1715 # define XXH_FORCE_ALIGN_CHECK 1
1716 # endif
1717 #endif
1718 
1719 #ifndef XXH_NO_INLINE_HINTS
1720 # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
1721 # define XXH_NO_INLINE_HINTS 1
1722 # else
1723 # define XXH_NO_INLINE_HINTS 0
1724 # endif
1725 #endif
1726 
1727 #ifndef XXH3_INLINE_SECRET
1728 # if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
1729  || !defined(XXH_INLINE_ALL)
1730 # define XXH3_INLINE_SECRET 0
1731 # else
1732 # define XXH3_INLINE_SECRET 1
1733 # endif
1734 #endif
1735 
1736 #ifndef XXH32_ENDJMP
1737 /* generally preferable for performance */
1738 # define XXH32_ENDJMP 0
1739 #endif
1740 
1747 /* *************************************
1748 * Includes & Memory related functions
1749 ***************************************/
1750 #if defined(XXH_NO_STREAM)
1751 /* nothing */
1752 #elif defined(XXH_NO_STDLIB)
1753 
1754 /* When requesting to disable any mention of stdlib,
1755  * the library loses the ability to invoked malloc / free.
1756  * In practice, it means that functions like `XXH*_createState()`
1757  * will always fail, and return NULL.
1758  * This flag is useful in situations where
1759  * xxhash.h is integrated into some kernel, embedded or limited environment
1760  * without access to dynamic allocation.
1761  */
1762 
1763 static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
1764 static void XXH_free(void* p) { (void)p; }
1765 
1766 #else
1767 
1768 /*
1769  * Modify the local functions below should you wish to use
1770  * different memory routines for malloc() and free()
1771  */
1772 #include <stdlib.h>
1773 
1778 static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
1779 
1784 static void XXH_free(void* p) { free(p); }
1785 
1786 #endif /* XXH_NO_STDLIB */
1787 
1788 #include <string.h>
1789 
1794 static void* XXH_memcpy(void* dest, const void* src, size_t size)
1795 {
1796  return memcpy(dest,src,size);
1797 }
1798 
1799 #include <limits.h> /* ULLONG_MAX */
1800 
1801 
1802 /* *************************************
1803 * Compiler Specific Options
1804 ***************************************/
1805 #ifdef _MSC_VER /* Visual Studio warning fix */
1806 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1807 #endif
1808 
1809 #if XXH_NO_INLINE_HINTS /* disable inlining hints */
1810 # if defined(__GNUC__) || defined(__clang__)
1811 # define XXH_FORCE_INLINE static __attribute__((unused))
1812 # else
1813 # define XXH_FORCE_INLINE static
1814 # endif
1815 # define XXH_NO_INLINE static
1816 /* enable inlining hints */
1817 #elif defined(__GNUC__) || defined(__clang__)
1818 # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1819 # define XXH_NO_INLINE static __attribute__((noinline))
1820 #elif defined(_MSC_VER) /* Visual Studio */
1821 # define XXH_FORCE_INLINE static __forceinline
1822 # define XXH_NO_INLINE static __declspec(noinline)
1823 #elif defined (__cplusplus) \
1824  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
1825 # define XXH_FORCE_INLINE static inline
1826 # define XXH_NO_INLINE static
1827 #else
1828 # define XXH_FORCE_INLINE static
1829 # define XXH_NO_INLINE static
1830 #endif
1831 
1832 #if XXH3_INLINE_SECRET
1833 # define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
1834 #else
1835 # define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
1836 #endif
1837 
1838 
1839 /* *************************************
1840 * Debug
1841 ***************************************/
1850 #ifndef XXH_DEBUGLEVEL
1851 # ifdef DEBUGLEVEL /* backwards compat */
1852 # define XXH_DEBUGLEVEL DEBUGLEVEL
1853 # else
1854 # define XXH_DEBUGLEVEL 0
1855 # endif
1856 #endif
1857 
1858 #if (XXH_DEBUGLEVEL>=1) || __CPPCHECK__
1859 # include <assert.h> /* note: can still be disabled with NDEBUG */
1860 # define XXH_ASSERT(c) assert(c)
1861 #else
1862 # define XXH_ASSERT(c) XXH_ASSUME(c)
1863 #endif
1864 
1865 /* note: use after variable declarations */
1866 #ifndef XXH_STATIC_ASSERT
1867 # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
1868 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
1869 # elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
1870 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1871 # else
1872 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
1873 # endif
1874 # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
1875 #endif
1876 
1893 #if defined(__GNUC__) || defined(__clang__)
1894 # define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
1895 #else
1896 # define XXH_COMPILER_GUARD(var) ((void)0)
1897 #endif
1898 
1899 #if defined(__clang__)
1900 # define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
1901 #else
1902 # define XXH_COMPILER_GUARD_W(var) ((void)0)
1903 #endif
1904 
1905 /* *************************************
1906 * Basic Types
1907 ***************************************/
1908 #if !defined (__VMS) \
1909  && (defined (__cplusplus) \
1910  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
1911 # include <stdint.h>
1912  typedef uint8_t xxh_u8;
1913 #else
1914  typedef unsigned char xxh_u8;
1915 #endif
1916 typedef XXH32_hash_t xxh_u32;
1917 
1918 #ifdef XXH_OLD_NAMES
1919 # define BYTE xxh_u8
1920 # define U8 xxh_u8
1921 # define U32 xxh_u32
1922 #endif
1923 
1924 /* *** Memory access *** */
1925 
1976 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1977 /*
1978  * Manual byteshift. Best for old compilers which don't inline memcpy.
1979  * We actually directly use XXH_readLE32 and XXH_readBE32.
1980  */
1981 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1982 
1983 /*
1984  * Force direct memory access. Only works on CPU which support unaligned memory
1985  * access in hardware.
1986  */
1987 static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
1988 
1989 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1990 
1991 /*
1992  * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
1993  * documentation claimed that it only increased the alignment, but actually it
1994  * can decrease it on gcc, clang, and icc:
1995  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
1996  * https://gcc.godbolt.org/z/xYez1j67Y.
1997  */
1998 #ifdef XXH_OLD_NAMES
1999 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
2000 #endif
2001 static xxh_u32 XXH_read32(const void* ptr)
2002 {
2003  typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
2004  return *((const xxh_unalign32*)ptr);
2005 }
2006 
2007 #else
2008 
2009 /*
2010  * Portable and safe solution. Generally efficient.
2011  * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2012  */
2013 static xxh_u32 XXH_read32(const void* memPtr)
2014 {
2015  xxh_u32 val;
2016  XXH_memcpy(&val, memPtr, sizeof(val));
2017  return val;
2018 }
2019 
2020 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2021 
2022 
2023 /* *** Endianness *** */
2024 
2041 #ifndef XXH_CPU_LITTLE_ENDIAN
2042 /*
2043  * Try to detect endianness automatically, to avoid the nonstandard behavior
2044  * in `XXH_isLittleEndian()`
2045  */
2046 # if defined(_WIN32) /* Windows is always little endian */ \
2047  || defined(__LITTLE_ENDIAN__) \
2048  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2049 # define XXH_CPU_LITTLE_ENDIAN 1
2050 # elif defined(__BIG_ENDIAN__) \
2051  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2052 # define XXH_CPU_LITTLE_ENDIAN 0
2053 # else
2054 
2060 static int XXH_isLittleEndian(void)
2061 {
2062  /*
2063  * Portable and well-defined behavior.
2064  * Don't use static: it is detrimental to performance.
2065  */
2066  const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
2067  return one.c[0];
2068 }
2069 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
2070 # endif
2071 #endif
2072 
2073 
2074 
2075 
2076 /* ****************************************
2077 * Compiler-specific Functions and Macros
2078 ******************************************/
2079 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2080 
2081 #ifdef __has_builtin
2082 # define XXH_HAS_BUILTIN(x) __has_builtin(x)
2083 #else
2084 # define XXH_HAS_BUILTIN(x) 0
2085 #endif
2086 
2087 
2088 
2089 /*
2090  * C23 and future versions have standard "unreachable()".
2091  * Once it has been implemented reliably we can add it as an
2092  * additional case:
2093  *
2094  * ```
2095  * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
2096  * # include <stddef.h>
2097  * # ifdef unreachable
2098  * # define XXH_UNREACHABLE() unreachable()
2099  * # endif
2100  * #endif
2101  * ```
2102  *
2103  * Note C++23 also has std::unreachable() which can be detected
2104  * as follows:
2105  * ```
2106  * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
2107  * # include <utility>
2108  * # define XXH_UNREACHABLE() std::unreachable()
2109  * #endif
2110  * ```
2111  * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
2112  * We don't use that as including `<utility>` in `extern "C"` blocks
2113  * doesn't work on GCC12
2114  */
2115 
2116 #if XXH_HAS_BUILTIN(__builtin_unreachable)
2117 # define XXH_UNREACHABLE() __builtin_unreachable()
2118 
2119 #elif defined(_MSC_VER)
2120 # define XXH_UNREACHABLE() __assume(0)
2121 
2122 #else
2123 # define XXH_UNREACHABLE()
2124 #endif
2125 
2126 #if XXH_HAS_BUILTIN(__builtin_assume)
2127 # define XXH_ASSUME(c) __builtin_assume(c)
2128 #else
2129 # define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
2130 #endif
2131 
2145 #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
2146  && XXH_HAS_BUILTIN(__builtin_rotateleft64)
2147 # define XXH_rotl32 __builtin_rotateleft32
2148 # define XXH_rotl64 __builtin_rotateleft64
2149 /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
2150 #elif defined(_MSC_VER)
2151 # define XXH_rotl32(x,r) _rotl(x,r)
2152 # define XXH_rotl64(x,r) _rotl64(x,r)
2153 #else
2154 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
2155 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
2156 #endif
2157 
2166 #if defined(_MSC_VER) /* Visual Studio */
2167 # define XXH_swap32 _byteswap_ulong
2168 #elif XXH_GCC_VERSION >= 403
2169 # define XXH_swap32 __builtin_bswap32
2170 #else
2171 static xxh_u32 XXH_swap32 (xxh_u32 x)
2172 {
2173  return ((x << 24) & 0xff000000 ) |
2174  ((x << 8) & 0x00ff0000 ) |
2175  ((x >> 8) & 0x0000ff00 ) |
2176  ((x >> 24) & 0x000000ff );
2177 }
2178 #endif
2179 
2180 
2181 /* ***************************
2182 * Memory reads
2183 *****************************/
2184 
2189 typedef enum {
2190  XXH_aligned,
2191  XXH_unaligned
2192 } XXH_alignment;
2193 
2194 /*
2195  * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
2196  *
2197  * This is ideal for older compilers which don't inline memcpy.
2198  */
2199 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2200 
2201 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
2202 {
2203  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2204  return bytePtr[0]
2205  | ((xxh_u32)bytePtr[1] << 8)
2206  | ((xxh_u32)bytePtr[2] << 16)
2207  | ((xxh_u32)bytePtr[3] << 24);
2208 }
2209 
2210 XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
2211 {
2212  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2213  return bytePtr[3]
2214  | ((xxh_u32)bytePtr[2] << 8)
2215  | ((xxh_u32)bytePtr[1] << 16)
2216  | ((xxh_u32)bytePtr[0] << 24);
2217 }
2218 
2219 #else
2220 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
2221 {
2222  return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
2223 }
2224 
2225 static xxh_u32 XXH_readBE32(const void* ptr)
2226 {
2227  return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
2228 }
2229 #endif
2230 
2231 XXH_FORCE_INLINE xxh_u32
2232 XXH_readLE32_align(const void* ptr, XXH_alignment align)
2233 {
2234  if (align==XXH_unaligned) {
2235  return XXH_readLE32(ptr);
2236  } else {
2237  return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
2238  }
2239 }
2240 
2241 
2242 /* *************************************
2243 * Misc
2244 ***************************************/
2246 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
2247 
2248 
2249 /* *******************************************************************
2250 * 32-bit hash functions
2251 *********************************************************************/
2260  /* #define instead of static const, to be used as initializers */
2261 #define XXH_PRIME32_1 0x9E3779B1U
2262 #define XXH_PRIME32_2 0x85EBCA77U
2263 #define XXH_PRIME32_3 0xC2B2AE3DU
2264 #define XXH_PRIME32_4 0x27D4EB2FU
2265 #define XXH_PRIME32_5 0x165667B1U
2267 #ifdef XXH_OLD_NAMES
2268 # define PRIME32_1 XXH_PRIME32_1
2269 # define PRIME32_2 XXH_PRIME32_2
2270 # define PRIME32_3 XXH_PRIME32_3
2271 # define PRIME32_4 XXH_PRIME32_4
2272 # define PRIME32_5 XXH_PRIME32_5
2273 #endif
2274 
2286 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
2287 {
2288  acc += input * XXH_PRIME32_2;
2289  acc = XXH_rotl32(acc, 13);
2290  acc *= XXH_PRIME32_1;
2291 #if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
2292  /*
2293  * UGLY HACK:
2294  * A compiler fence is the only thing that prevents GCC and Clang from
2295  * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
2296  * reason) without globally disabling SSE4.1.
2297  *
2298  * The reason we want to avoid vectorization is because despite working on
2299  * 4 integers at a time, there are multiple factors slowing XXH32 down on
2300  * SSE4:
2301  * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
2302  * newer chips!) making it slightly slower to multiply four integers at
2303  * once compared to four integers independently. Even when pmulld was
2304  * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
2305  * just to multiply unless doing a long operation.
2306  *
2307  * - Four instructions are required to rotate,
2308  * movqda tmp, v // not required with VEX encoding
2309  * pslld tmp, 13 // tmp <<= 13
2310  * psrld v, 19 // x >>= 19
2311  * por v, tmp // x |= tmp
2312  * compared to one for scalar:
2313  * roll v, 13 // reliably fast across the board
2314  * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
2315  *
2316  * - Instruction level parallelism is actually more beneficial here because
2317  * the SIMD actually serializes this operation: While v1 is rotating, v2
2318  * can load data, while v3 can multiply. SSE forces them to operate
2319  * together.
2320  *
2321  * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
2322  * the loop. NEON is only faster on the A53, and with the newer cores, it is less
2323  * than half the speed.
2324  */
2325  XXH_COMPILER_GUARD(acc);
2326 #endif
2327  return acc;
2328 }
2329 
2340 static xxh_u32 XXH32_avalanche(xxh_u32 hash)
2341 {
2342  hash ^= hash >> 15;
2343  hash *= XXH_PRIME32_2;
2344  hash ^= hash >> 13;
2345  hash *= XXH_PRIME32_3;
2346  hash ^= hash >> 16;
2347  return hash;
2348 }
2349 
2350 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
2351 
2367 static XXH_PUREF xxh_u32
2368 XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2369 {
2370 #define XXH_PROCESS1 do { \
2371  hash += (*ptr++) * XXH_PRIME32_5; \
2372  hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
2373 } while (0)
2374 
2375 #define XXH_PROCESS4 do { \
2376  hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2377  ptr += 4; \
2378  hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
2379 } while (0)
2380 
2381  if (ptr==NULL) XXH_ASSERT(len == 0);
2382 
2383  /* Compact rerolled version; generally faster */
2384  if (!XXH32_ENDJMP) {
2385  len &= 15;
2386  while (len >= 4) {
2387  XXH_PROCESS4;
2388  len -= 4;
2389  }
2390  while (len > 0) {
2391  XXH_PROCESS1;
2392  --len;
2393  }
2394  return XXH32_avalanche(hash);
2395  } else {
2396  switch(len&15) /* or switch(bEnd - p) */ {
2397  case 12: XXH_PROCESS4;
2398  XXH_FALLTHROUGH; /* fallthrough */
2399  case 8: XXH_PROCESS4;
2400  XXH_FALLTHROUGH; /* fallthrough */
2401  case 4: XXH_PROCESS4;
2402  return XXH32_avalanche(hash);
2403 
2404  case 13: XXH_PROCESS4;
2405  XXH_FALLTHROUGH; /* fallthrough */
2406  case 9: XXH_PROCESS4;
2407  XXH_FALLTHROUGH; /* fallthrough */
2408  case 5: XXH_PROCESS4;
2409  XXH_PROCESS1;
2410  return XXH32_avalanche(hash);
2411 
2412  case 14: XXH_PROCESS4;
2413  XXH_FALLTHROUGH; /* fallthrough */
2414  case 10: XXH_PROCESS4;
2415  XXH_FALLTHROUGH; /* fallthrough */
2416  case 6: XXH_PROCESS4;
2417  XXH_PROCESS1;
2418  XXH_PROCESS1;
2419  return XXH32_avalanche(hash);
2420 
2421  case 15: XXH_PROCESS4;
2422  XXH_FALLTHROUGH; /* fallthrough */
2423  case 11: XXH_PROCESS4;
2424  XXH_FALLTHROUGH; /* fallthrough */
2425  case 7: XXH_PROCESS4;
2426  XXH_FALLTHROUGH; /* fallthrough */
2427  case 3: XXH_PROCESS1;
2428  XXH_FALLTHROUGH; /* fallthrough */
2429  case 2: XXH_PROCESS1;
2430  XXH_FALLTHROUGH; /* fallthrough */
2431  case 1: XXH_PROCESS1;
2432  XXH_FALLTHROUGH; /* fallthrough */
2433  case 0: return XXH32_avalanche(hash);
2434  }
2435  XXH_ASSERT(0);
2436  return hash; /* reaching this point is deemed impossible */
2437  }
2438 }
2439 
2440 #ifdef XXH_OLD_NAMES
2441 # define PROCESS1 XXH_PROCESS1
2442 # define PROCESS4 XXH_PROCESS4
2443 #else
2444 # undef XXH_PROCESS1
2445 # undef XXH_PROCESS4
2446 #endif
2447 
2456 XXH_FORCE_INLINE XXH_PUREF xxh_u32
2457 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
2458 {
2459  xxh_u32 h32;
2460 
2461  if (input==NULL) XXH_ASSERT(len == 0);
2462 
2463  if (len>=16) {
2464  const xxh_u8* const bEnd = input + len;
2465  const xxh_u8* const limit = bEnd - 15;
2466  xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2467  xxh_u32 v2 = seed + XXH_PRIME32_2;
2468  xxh_u32 v3 = seed + 0;
2469  xxh_u32 v4 = seed - XXH_PRIME32_1;
2470 
2471  do {
2472  v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
2473  v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
2474  v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
2475  v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
2476  } while (input < limit);
2477 
2478  h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
2479  + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2480  } else {
2481  h32 = seed + XXH_PRIME32_5;
2482  }
2483 
2484  h32 += (xxh_u32)len;
2485 
2486  return XXH32_finalize(h32, input, len&15, align);
2487 }
2488 
2490 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
2491 {
2492 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2493  /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2494  XXH32_state_t state;
2495  XXH32_reset(&state, seed);
2496  XXH32_update(&state, (const xxh_u8*)input, len);
2497  return XXH32_digest(&state);
2498 #else
2499  if (XXH_FORCE_ALIGN_CHECK) {
2500  if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
2501  return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2502  } }
2503 
2504  return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2505 #endif
2506 }
2507 
2508 
2509 
2510 /******* Hash streaming *******/
2511 #ifndef XXH_NO_STREAM
2512 
2514 {
2515  return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
2516 }
2519 {
2520  XXH_free(statePtr);
2521  return XXH_OK;
2522 }
2523 
2525 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
2526 {
2527  XXH_memcpy(dstState, srcState, sizeof(*dstState));
2528 }
2529 
2531 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
2532 {
2533  XXH_ASSERT(statePtr != NULL);
2534  // cppcheck-suppress nullPointerRedundantCheck; false positive
2535  memset(statePtr, 0, sizeof(*statePtr));
2536  // cppcheck-suppress nullPointerRedundantCheck; false positive
2537  statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2538  // cppcheck-suppress nullPointerRedundantCheck; false positive
2539  statePtr->v[1] = seed + XXH_PRIME32_2;
2540  // cppcheck-suppress nullPointerRedundantCheck; false positive
2541  statePtr->v[2] = seed + 0;
2542  // cppcheck-suppress nullPointerRedundantCheck; false positive
2543  statePtr->v[3] = seed - XXH_PRIME32_1;
2544  return XXH_OK;
2545 }
2546 
2547 
2550 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2551 {
2552  if (input==NULL) {
2553  XXH_ASSERT(len == 0);
2554  return XXH_OK;
2555  }
2556 
2557  { const xxh_u8* p = (const xxh_u8*)input;
2558  const xxh_u8* const bEnd = p + len;
2559 
2560  state->total_len_32 += (XXH32_hash_t)len;
2561  state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2562 
2563  if (state->memsize + len < 16) { /* fill in tmp buffer */
2564  XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
2565  state->memsize += (XXH32_hash_t)len;
2566  return XXH_OK;
2567  }
2568 
2569  if (state->memsize) { /* some data left from previous update */
2570  XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
2571  { const xxh_u32* p32 = state->mem32;
2572  state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2573  state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2574  state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2575  state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2576  }
2577  p += 16-state->memsize;
2578  state->memsize = 0;
2579  }
2580 
2581  if (p <= bEnd-16) {
2582  const xxh_u8* const limit = bEnd - 16;
2583 
2584  do {
2585  state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
2586  state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
2587  state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
2588  state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
2589  } while (p<=limit);
2590 
2591  }
2592 
2593  if (p < bEnd) {
2594  XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
2595  state->memsize = (unsigned)(bEnd-p);
2596  }
2597  }
2598 
2599  return XXH_OK;
2600 }
2601 
2602 
2604 XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
2605 {
2606  xxh_u32 h32;
2607 
2608  if (state->large_len) {
2609  h32 = XXH_rotl32(state->v[0], 1)
2610  + XXH_rotl32(state->v[1], 7)
2611  + XXH_rotl32(state->v[2], 12)
2612  + XXH_rotl32(state->v[3], 18);
2613  } else {
2614  h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
2615  }
2616 
2617  h32 += state->total_len_32;
2618 
2619  return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2620 }
2621 #endif /* !XXH_NO_STREAM */
2622 
2623 /******* Canonical representation *******/
2624 
2640 {
2641  XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
2642  if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
2643  XXH_memcpy(dst, &hash, sizeof(*dst));
2644 }
2647 {
2648  return XXH_readBE32(src);
2649 }
2650 
2651 
2652 #ifndef XXH_NO_LONG_LONG
2653 
2654 /* *******************************************************************
2655 * 64-bit hash functions
2656 *********************************************************************/
2662 /******* Memory access *******/
2663 
2664 typedef XXH64_hash_t xxh_u64;
2665 
2666 #ifdef XXH_OLD_NAMES
2667 # define U64 xxh_u64
2668 #endif
2669 
2670 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2671 /*
2672  * Manual byteshift. Best for old compilers which don't inline memcpy.
2673  * We actually directly use XXH_readLE64 and XXH_readBE64.
2674  */
2675 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2676 
2677 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
2678 static xxh_u64 XXH_read64(const void* memPtr)
2679 {
2680  return *(const xxh_u64*) memPtr;
2681 }
2682 
2683 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2684 
2685 /*
2686  * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
2687  * documentation claimed that it only increased the alignment, but actually it
2688  * can decrease it on gcc, clang, and icc:
2689  * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
2690  * https://gcc.godbolt.org/z/xYez1j67Y.
2691  */
2692 #ifdef XXH_OLD_NAMES
2693 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
2694 #endif
2695 static xxh_u64 XXH_read64(const void* ptr)
2696 {
2697  typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
2698  return *((const xxh_unalign64*)ptr);
2699 }
2700 
2701 #else
2702 
2703 /*
2704  * Portable and safe solution. Generally efficient.
2705  * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2706  */
2707 static xxh_u64 XXH_read64(const void* memPtr)
2708 {
2709  xxh_u64 val;
2710  XXH_memcpy(&val, memPtr, sizeof(val));
2711  return val;
2712 }
2713 
2714 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2715 
2716 #if defined(_MSC_VER) /* Visual Studio */
2717 # define XXH_swap64 _byteswap_uint64
2718 #elif XXH_GCC_VERSION >= 403
2719 # define XXH_swap64 __builtin_bswap64
2720 #else
2721 static xxh_u64 XXH_swap64(xxh_u64 x)
2722 {
2723  return ((x << 56) & 0xff00000000000000ULL) |
2724  ((x << 40) & 0x00ff000000000000ULL) |
2725  ((x << 24) & 0x0000ff0000000000ULL) |
2726  ((x << 8) & 0x000000ff00000000ULL) |
2727  ((x >> 8) & 0x00000000ff000000ULL) |
2728  ((x >> 24) & 0x0000000000ff0000ULL) |
2729  ((x >> 40) & 0x000000000000ff00ULL) |
2730  ((x >> 56) & 0x00000000000000ffULL);
2731 }
2732 #endif
2733 
2734 
2735 /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
2736 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2737 
2738 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
2739 {
2740  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2741  return bytePtr[0]
2742  | ((xxh_u64)bytePtr[1] << 8)
2743  | ((xxh_u64)bytePtr[2] << 16)
2744  | ((xxh_u64)bytePtr[3] << 24)
2745  | ((xxh_u64)bytePtr[4] << 32)
2746  | ((xxh_u64)bytePtr[5] << 40)
2747  | ((xxh_u64)bytePtr[6] << 48)
2748  | ((xxh_u64)bytePtr[7] << 56);
2749 }
2750 
2751 XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
2752 {
2753  const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2754  return bytePtr[7]
2755  | ((xxh_u64)bytePtr[6] << 8)
2756  | ((xxh_u64)bytePtr[5] << 16)
2757  | ((xxh_u64)bytePtr[4] << 24)
2758  | ((xxh_u64)bytePtr[3] << 32)
2759  | ((xxh_u64)bytePtr[2] << 40)
2760  | ((xxh_u64)bytePtr[1] << 48)
2761  | ((xxh_u64)bytePtr[0] << 56);
2762 }
2763 
2764 #else
2765 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
2766 {
2767  return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
2768 }
2769 
2770 static xxh_u64 XXH_readBE64(const void* ptr)
2771 {
2772  return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
2773 }
2774 #endif
2775 
2776 XXH_FORCE_INLINE xxh_u64
2777 XXH_readLE64_align(const void* ptr, XXH_alignment align)
2778 {
2779  if (align==XXH_unaligned)
2780  return XXH_readLE64(ptr);
2781  else
2782  return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
2783 }
2784 
2785 
2786 /******* xxh64 *******/
2795 /* #define rather that static const, to be used as initializers */
2796 #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL
2797 #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL
2798 #define XXH_PRIME64_3 0x165667B19E3779F9ULL
2799 #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL
2800 #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL
2802 #ifdef XXH_OLD_NAMES
2803 # define PRIME64_1 XXH_PRIME64_1
2804 # define PRIME64_2 XXH_PRIME64_2
2805 # define PRIME64_3 XXH_PRIME64_3
2806 # define PRIME64_4 XXH_PRIME64_4
2807 # define PRIME64_5 XXH_PRIME64_5
2808 #endif
2809 
2811 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
2812 {
2813  acc += input * XXH_PRIME64_2;
2814  acc = XXH_rotl64(acc, 31);
2815  acc *= XXH_PRIME64_1;
2816  return acc;
2817 }
2818 
2819 static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
2820 {
2821  val = XXH64_round(0, val);
2822  acc ^= val;
2823  acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
2824  return acc;
2825 }
2826 
2828 static xxh_u64 XXH64_avalanche(xxh_u64 hash)
2829 {
2830  hash ^= hash >> 33;
2831  hash *= XXH_PRIME64_2;
2832  hash ^= hash >> 29;
2833  hash *= XXH_PRIME64_3;
2834  hash ^= hash >> 32;
2835  return hash;
2836 }
2837 
2838 
2839 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
2840 
2856 static XXH_PUREF xxh_u64
2857 XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2858 {
2859  if (ptr==NULL) XXH_ASSERT(len == 0);
2860  len &= 31;
2861  while (len >= 8) {
2862  xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
2863  ptr += 8;
2864  hash ^= k1;
2865  hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2866  len -= 8;
2867  }
2868  if (len >= 4) {
2869  hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
2870  ptr += 4;
2871  hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2872  len -= 4;
2873  }
2874  while (len > 0) {
2875  hash ^= (*ptr++) * XXH_PRIME64_5;
2876  hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
2877  --len;
2878  }
2879  return XXH64_avalanche(hash);
2880 }
2881 
2882 #ifdef XXH_OLD_NAMES
2883 # define PROCESS1_64 XXH_PROCESS1_64
2884 # define PROCESS4_64 XXH_PROCESS4_64
2885 # define PROCESS8_64 XXH_PROCESS8_64
2886 #else
2887 # undef XXH_PROCESS1_64
2888 # undef XXH_PROCESS4_64
2889 # undef XXH_PROCESS8_64
2890 #endif
2891 
2900 XXH_FORCE_INLINE XXH_PUREF xxh_u64
2901 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
2902 {
2903  xxh_u64 h64;
2904  if (input==NULL) XXH_ASSERT(len == 0);
2905 
2906  if (len>=32) {
2907  const xxh_u8* const bEnd = input + len;
2908  const xxh_u8* const limit = bEnd - 31;
2909  xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2910  xxh_u64 v2 = seed + XXH_PRIME64_2;
2911  xxh_u64 v3 = seed + 0;
2912  xxh_u64 v4 = seed - XXH_PRIME64_1;
2913 
2914  do {
2915  v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
2916  v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
2917  v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
2918  v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
2919  } while (input<limit);
2920 
2921  h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2922  h64 = XXH64_mergeRound(h64, v1);
2923  h64 = XXH64_mergeRound(h64, v2);
2924  h64 = XXH64_mergeRound(h64, v3);
2925  h64 = XXH64_mergeRound(h64, v4);
2926 
2927  } else {
2928  h64 = seed + XXH_PRIME64_5;
2929  }
2930 
2931  h64 += (xxh_u64) len;
2932 
2933  return XXH64_finalize(h64, input, len, align);
2934 }
2935 
2936 
2938 XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
2939 {
2940 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2941  /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2942  XXH64_state_t state;
2943  XXH64_reset(&state, seed);
2944  XXH64_update(&state, (const xxh_u8*)input, len);
2945  return XXH64_digest(&state);
2946 #else
2947  if (XXH_FORCE_ALIGN_CHECK) {
2948  if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
2949  return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2950  } }
2951 
2952  return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2953 
2954 #endif
2955 }
2956 
2957 /******* Hash Streaming *******/
2958 #ifndef XXH_NO_STREAM
2959 
2961 {
2962  return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
2963 }
2966 {
2967  XXH_free(statePtr);
2968  return XXH_OK;
2969 }
2970 
2973 {
2974  XXH_memcpy(dstState, srcState, sizeof(*dstState));
2975 }
2976 
2979 {
2980  XXH_ASSERT(statePtr != NULL);
2981  // cppcheck-suppress nullPointerRedundantCheck; false positive
2982  memset(statePtr, 0, sizeof(*statePtr));
2983  // cppcheck-suppress nullPointerRedundantCheck; false positive
2984  statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2985  // cppcheck-suppress nullPointerRedundantCheck; false positive
2986  statePtr->v[1] = seed + XXH_PRIME64_2;
2987  // cppcheck-suppress nullPointerRedundantCheck; false positive
2988  statePtr->v[2] = seed + 0;
2989  // cppcheck-suppress nullPointerRedundantCheck; false positive
2990  statePtr->v[3] = seed - XXH_PRIME64_1;
2991  return XXH_OK;
2992 }
2993 
2996 XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
2997 {
2998  if (input==NULL) {
2999  XXH_ASSERT(len == 0);
3000  return XXH_OK;
3001  }
3002 
3003  { const xxh_u8* p = (const xxh_u8*)input;
3004  const xxh_u8* const bEnd = p + len;
3005 
3006  state->total_len += len;
3007 
3008  if (state->memsize + len < 32) { /* fill in tmp buffer */
3009  XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
3010  state->memsize += (xxh_u32)len;
3011  return XXH_OK;
3012  }
3013 
3014  if (state->memsize) { /* tmp buffer is full */
3015  XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
3016  state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
3017  state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
3018  state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
3019  state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
3020  p += 32 - state->memsize;
3021  state->memsize = 0;
3022  }
3023 
3024  if (p+32 <= bEnd) {
3025  const xxh_u8* const limit = bEnd - 32;
3026 
3027  do {
3028  state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
3029  state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
3030  state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
3031  state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
3032  } while (p<=limit);
3033 
3034  }
3035 
3036  if (p < bEnd) {
3037  XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
3038  state->memsize = (unsigned)(bEnd-p);
3039  }
3040  }
3041 
3042  return XXH_OK;
3043 }
3044 
3045 
3048 {
3049  xxh_u64 h64;
3050 
3051  if (state->total_len >= 32) {
3052  h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
3053  h64 = XXH64_mergeRound(h64, state->v[0]);
3054  h64 = XXH64_mergeRound(h64, state->v[1]);
3055  h64 = XXH64_mergeRound(h64, state->v[2]);
3056  h64 = XXH64_mergeRound(h64, state->v[3]);
3057  } else {
3058  h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
3059  }
3060 
3061  h64 += (xxh_u64) state->total_len;
3062 
3063  return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
3064 }
3065 #endif /* !XXH_NO_STREAM */
3066 
3067 /******* Canonical representation *******/
3068 
3071 {
3072  XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
3073  if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
3074  XXH_memcpy(dst, &hash, sizeof(*dst));
3075 }
3076 
3079 {
3080  return XXH_readBE64(src);
3081 }
3082 
3083 #ifndef XXH_NO_XXH3
3084 
3085 /* *********************************************************************
3086 * XXH3
3087 * New generation hash designed for speed on small keys and vectorization
3088 ************************************************************************ */
3096 /* === Compiler specifics === */
3097 
3098 #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
3099 # define XXH_RESTRICT /* disable */
3100 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
3101 # define XXH_RESTRICT restrict
3102 #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
3103  || (defined (__clang__)) \
3104  || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
3105  || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
3106 /*
3107  * There are a LOT more compilers that recognize __restrict but this
3108  * covers the major ones.
3109  */
3110 # define XXH_RESTRICT __restrict
3111 #else
3112 # define XXH_RESTRICT /* disable */
3113 #endif
3114 
3115 #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
3116  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
3117  || defined(__clang__)
3118 # define XXH_likely(x) __builtin_expect(x, 1)
3119 # define XXH_unlikely(x) __builtin_expect(x, 0)
3120 #else
3121 # define XXH_likely(x) (x)
3122 # define XXH_unlikely(x) (x)
3123 #endif
3124 
3125 #if defined(__GNUC__) || defined(__clang__)
3126 # if defined(__ARM_FEATURE_SVE)
3127 # include <arm_sve.h>
3128 # endif
3129 # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3130  || (defined(_M_ARM) && _M_ARM >= 7) \
3131  || defined(_M_ARM64) || defined(_M_ARM64EC)
3132 # define inline __inline__ /* circumvent a clang bug */
3133 # include <arm_neon.h>
3134 # undef inline
3135 # elif defined(__AVX2__)
3136 # include <immintrin.h>
3137 # elif defined(__SSE2__)
3138 # include <emmintrin.h>
3139 # endif
3140 #endif
3141 
3142 #if defined(_MSC_VER)
3143 # include <intrin.h>
3144 #endif
3145 
3146 /*
3147  * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
3148  * remaining a true 64-bit/128-bit hash function.
3149  *
3150  * This is done by prioritizing a subset of 64-bit operations that can be
3151  * emulated without too many steps on the average 32-bit machine.
3152  *
3153  * For example, these two lines seem similar, and run equally fast on 64-bit:
3154  *
3155  * xxh_u64 x;
3156  * x ^= (x >> 47); // good
3157  * x ^= (x >> 13); // bad
3158  *
3159  * However, to a 32-bit machine, there is a major difference.
3160  *
3161  * x ^= (x >> 47) looks like this:
3162  *
3163  * x.lo ^= (x.hi >> (47 - 32));
3164  *
3165  * while x ^= (x >> 13) looks like this:
3166  *
3167  * // note: funnel shifts are not usually cheap.
3168  * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
3169  * x.hi ^= (x.hi >> 13);
3170  *
3171  * The first one is significantly faster than the second, simply because the
3172  * shift is larger than 32. This means:
3173  * - All the bits we need are in the upper 32 bits, so we can ignore the lower
3174  * 32 bits in the shift.
3175  * - The shift result will always fit in the lower 32 bits, and therefore,
3176  * we can ignore the upper 32 bits in the xor.
3177  *
3178  * Thanks to this optimization, XXH3 only requires these features to be efficient:
3179  *
3180  * - Usable unaligned access
3181  * - A 32-bit or 64-bit ALU
3182  * - If 32-bit, a decent ADC instruction
3183  * - A 32 or 64-bit multiply with a 64-bit result
3184  * - For the 128-bit variant, a decent byteswap helps short inputs.
3185  *
3186  * The first two are already required by XXH32, and almost all 32-bit and 64-bit
3187  * platforms which can run XXH32 can run XXH3 efficiently.
3188  *
3189  * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
3190  * notable exception.
3191  *
3192  * First of all, Thumb-1 lacks support for the UMULL instruction which
3193  * performs the important long multiply. This means numerous __aeabi_lmul
3194  * calls.
3195  *
3196  * Second of all, the 8 functional registers are just not enough.
3197  * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
3198  * Lo registers, and this shuffling results in thousands more MOVs than A32.
3199  *
3200  * A32 and T32 don't have this limitation. They can access all 14 registers,
3201  * do a 32->64 multiply with UMULL, and the flexible operand allowing free
3202  * shifts is helpful, too.
3203  *
3204  * Therefore, we do a quick sanity check.
3205  *
3206  * If compiling Thumb-1 for a target which supports ARM instructions, we will
3207  * emit a warning, as it is not a "sane" platform to compile for.
3208  *
3209  * Usually, if this happens, it is because of an accident and you probably need
3210  * to specify -march, as you likely meant to compile for a newer architecture.
3211  *
3212  * Credit: large sections of the vectorial and asm source code paths
3213  * have been contributed by @easyaspi314
3214  */
3215 #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
3216 # warning "XXH3 is highly inefficient without ARM or Thumb-2."
3217 #endif
3218 
3219 /* ==========================================
3220  * Vectorization detection
3221  * ========================================== */
3222 
3223 #ifdef XXH_DOXYGEN
3224 
3234 # define XXH_VECTOR XXH_SCALAR
3235 
3244 enum XXH_VECTOR_TYPE /* fake enum */ {
3245  XXH_SCALAR = 0,
3246  XXH_SSE2 = 1,
3252  XXH_AVX2 = 2,
3253  XXH_AVX512 = 3,
3254  XXH_NEON = 4,
3255  XXH_VSX = 5,
3256  XXH_SVE = 6,
3257 };
3267 # define XXH_ACC_ALIGN 8
3268 #endif
3269 
3270 /* Actual definition */
3271 #ifndef XXH_DOXYGEN
3272 # define XXH_SCALAR 0
3273 # define XXH_SSE2 1
3274 # define XXH_AVX2 2
3275 # define XXH_AVX512 3
3276 # define XXH_NEON 4
3277 # define XXH_VSX 5
3278 # define XXH_SVE 6
3279 #endif
3280 
3281 #ifndef XXH_VECTOR /* can be defined on command line */
3282 # if defined(__ARM_FEATURE_SVE)
3283 # define XXH_VECTOR XXH_SVE
3284 # elif ( \
3285  defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
3286  || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
3287  ) && ( \
3288  defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
3289  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3290  )
3291 # define XXH_VECTOR XXH_NEON
3292 # elif defined(__AVX512F__)
3293 # define XXH_VECTOR XXH_AVX512
3294 # elif defined(__AVX2__)
3295 # define XXH_VECTOR XXH_AVX2
3296 # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
3297 # define XXH_VECTOR XXH_SSE2
3298 # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
3299  || (defined(__s390x__) && defined(__VEC__)) \
3300  && defined(__GNUC__) /* TODO: IBM XL */
3301 # define XXH_VECTOR XXH_VSX
3302 # else
3303 # define XXH_VECTOR XXH_SCALAR
3304 # endif
3305 #endif
3306 
3307 /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
3308 #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
3309 # ifdef _MSC_VER
3310 # pragma warning(once : 4606)
3311 # else
3312 # warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
3313 # endif
3314 # undef XXH_VECTOR
3315 # define XXH_VECTOR XXH_SCALAR
3316 #endif
3317 
3318 /*
3319  * Controls the alignment of the accumulator,
3320  * for compatibility with aligned vector loads, which are usually faster.
3321  */
3322 #ifndef XXH_ACC_ALIGN
3323 # if defined(XXH_X86DISPATCH)
3324 # define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
3325 # elif XXH_VECTOR == XXH_SCALAR /* scalar */
3326 # define XXH_ACC_ALIGN 8
3327 # elif XXH_VECTOR == XXH_SSE2 /* sse2 */
3328 # define XXH_ACC_ALIGN 16
3329 # elif XXH_VECTOR == XXH_AVX2 /* avx2 */
3330 # define XXH_ACC_ALIGN 32
3331 # elif XXH_VECTOR == XXH_NEON /* neon */
3332 # define XXH_ACC_ALIGN 16
3333 # elif XXH_VECTOR == XXH_VSX /* vsx */
3334 # define XXH_ACC_ALIGN 16
3335 # elif XXH_VECTOR == XXH_AVX512 /* avx512 */
3336 # define XXH_ACC_ALIGN 64
3337 # elif XXH_VECTOR == XXH_SVE /* sve */
3338 # define XXH_ACC_ALIGN 64
3339 # endif
3340 #endif
3341 
3342 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
3343  || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
3344 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3345 #elif XXH_VECTOR == XXH_SVE
3346 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3347 #else
3348 # define XXH_SEC_ALIGN 8
3349 #endif
3350 
3351 #if defined(__GNUC__) || defined(__clang__)
3352 # define XXH_ALIASING __attribute__((may_alias))
3353 #else
3354 # define XXH_ALIASING /* nothing */
3355 #endif
3356 
3357 /*
3358  * UGLY HACK:
3359  * GCC usually generates the best code with -O3 for xxHash.
3360  *
3361  * However, when targeting AVX2, it is overzealous in its unrolling resulting
3362  * in code roughly 3/4 the speed of Clang.
3363  *
3364  * There are other issues, such as GCC splitting _mm256_loadu_si256 into
3365  * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
3366  * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
3367  *
3368  * That is why when compiling the AVX2 version, it is recommended to use either
3369  * -O2 -mavx2 -march=haswell
3370  * or
3371  * -O2 -mavx2 -mno-avx256-split-unaligned-load
3372  * for decent performance, or to use Clang instead.
3373  *
3374  * Fortunately, we can control the first one with a pragma that forces GCC into
3375  * -O2, but the other one we can't control without "failed to inline always
3376  * inline function due to target mismatch" warnings.
3377  */
3378 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
3379  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
3380  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
3381 # pragma GCC push_options
3382 # pragma GCC optimize("-O2")
3383 #endif
3384 
3385 #if XXH_VECTOR == XXH_NEON
3386 
3387 /*
3388  * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
3389  * optimizes out the entire hashLong loop because of the aliasing violation.
3390  *
3391  * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
3392  * so the only option is to mark it as aliasing.
3393  */
3394 typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
3395 
3409 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3410 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
3411 {
3412  return *(xxh_aliasing_uint64x2_t const *)ptr;
3413 }
3414 #else
3415 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
3416 {
3417  return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
3418 }
3419 #endif
3420 
3429 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
3430 XXH_FORCE_INLINE uint64x2_t
3431 XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3432 {
3433  /* Inline assembly is the only way */
3434  __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
3435  return acc;
3436 }
3437 XXH_FORCE_INLINE uint64x2_t
3438 XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3439 {
3440  /* This intrinsic works as expected */
3441  return vmlal_high_u32(acc, lhs, rhs);
3442 }
3443 #else
3444 /* Portable intrinsic versions */
3445 XXH_FORCE_INLINE uint64x2_t
3446 XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3447 {
3448  return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
3449 }
3452 XXH_FORCE_INLINE uint64x2_t
3453 XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3454 {
3455  return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
3456 }
3457 #endif
3458 
3494 # ifndef XXH3_NEON_LANES
3495 # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3496  && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
3497 # define XXH3_NEON_LANES 6
3498 # else
3499 # define XXH3_NEON_LANES XXH_ACC_NB
3500 # endif
3501 # endif
3502 #endif /* XXH_VECTOR == XXH_NEON */
3503 
3504 /*
3505  * VSX and Z Vector helpers.
3506  *
3507  * This is very messy, and any pull requests to clean this up are welcome.
3508  *
3509  * There are a lot of problems with supporting VSX and s390x, due to
3510  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
3511  */
3512 #if XXH_VECTOR == XXH_VSX
3513 /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
3514  * and `pixel`. This is a problem for obvious reasons.
3515  *
3516  * These keywords are unnecessary; the spec literally says they are
3517  * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
3518  * after including the header.
3519  *
3520  * We use pragma push_macro/pop_macro to keep the namespace clean. */
3521 # pragma push_macro("bool")
3522 # pragma push_macro("vector")
3523 # pragma push_macro("pixel")
3524 /* silence potential macro redefined warnings */
3525 # undef bool
3526 # undef vector
3527 # undef pixel
3528 
3529 # if defined(__s390x__)
3530 # include <s390intrin.h>
3531 # else
3532 # include <altivec.h>
3533 # endif
3534 
3535 /* Restore the original macro values, if applicable. */
3536 # pragma pop_macro("pixel")
3537 # pragma pop_macro("vector")
3538 # pragma pop_macro("bool")
3539 
3540 typedef __vector unsigned long long xxh_u64x2;
3541 typedef __vector unsigned char xxh_u8x16;
3542 typedef __vector unsigned xxh_u32x4;
3543 
3544 /*
3545  * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
3546  */
3547 typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
3548 
3549 # ifndef XXH_VSX_BE
3550 # if defined(__BIG_ENDIAN__) \
3551  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3552 # define XXH_VSX_BE 1
3553 # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3554 # warning "-maltivec=be is not recommended. Please use native endianness."
3555 # define XXH_VSX_BE 1
3556 # else
3557 # define XXH_VSX_BE 0
3558 # endif
3559 # endif /* !defined(XXH_VSX_BE) */
3560 
3561 # if XXH_VSX_BE
3562 # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3563 # define XXH_vec_revb vec_revb
3564 # else
3565 
3568 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
3569 {
3570  xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3571  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3572  return vec_perm(val, val, vByteSwap);
3573 }
3574 # endif
3575 # endif /* XXH_VSX_BE */
3576 
3580 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
3581 {
3582  xxh_u64x2 ret;
3583  XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
3584 # if XXH_VSX_BE
3585  ret = XXH_vec_revb(ret);
3586 # endif
3587  return ret;
3588 }
3589 
3590 /*
3591  * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
3592  *
3593  * These intrinsics weren't added until GCC 8, despite existing for a while,
3594  * and they are endian dependent. Also, their meaning swap depending on version.
3595  * */
3596 # if defined(__s390x__)
3597  /* s390x is always big endian, no issue on this platform */
3598 # define XXH_vec_mulo vec_mulo
3599 # define XXH_vec_mule vec_mule
3600 # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
3601 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
3602  /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
3603 # define XXH_vec_mulo __builtin_altivec_vmulouw
3604 # define XXH_vec_mule __builtin_altivec_vmuleuw
3605 # else
3606 /* gcc needs inline assembly */
3607 /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
3608 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
3609 {
3610  xxh_u64x2 result;
3611  __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3612  return result;
3613 }
3614 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
3615 {
3616  xxh_u64x2 result;
3617  __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3618  return result;
3619 }
3620 # endif /* XXH_vec_mulo, XXH_vec_mule */
3621 #endif /* XXH_VECTOR == XXH_VSX */
3622 
3623 #if XXH_VECTOR == XXH_SVE
3624 #define ACCRND(acc, offset) \
3625 do { \
3626  svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
3627  svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
3628  svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
3629  svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
3630  svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
3631  svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
3632  svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
3633  acc = svadd_u64_x(mask, acc, mul); \
3634 } while (0)
3635 #endif /* XXH_VECTOR == XXH_SVE */
3636 
3637 
3638 /* prefetch
3639  * can be disabled, by declaring XXH_NO_PREFETCH build macro */
3640 #if defined(XXH_NO_PREFETCH)
3641 # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3642 #else
3643 # if XXH_SIZE_OPT >= 1
3644 # define XXH_PREFETCH(ptr) (void)(ptr)
3645 # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
3646 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3647 # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3648 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3649 # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
3650 # else
3651 # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3652 # endif
3653 #endif /* XXH_NO_PREFETCH */
3654 
3655 
3656 /* ==========================================
3657  * XXH3 default settings
3658  * ========================================== */
3659 
3660 #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
3661 
3662 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3663 # error "default keyset is not large enough"
3664 #endif
3665 
3667 XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3668  0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3669  0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3670  0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3671  0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3672  0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3673  0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3674  0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3675  0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3676  0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3677  0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3678  0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3679  0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3680 };
3681 
3682 static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;
3683 static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;
3685 #ifdef XXH_OLD_NAMES
3686 # define kSecret XXH3_kSecret
3687 #endif
3688 
3689 #ifdef XXH_DOXYGEN
3690 
3706 XXH_FORCE_INLINE xxh_u64
3707 XXH_mult32to64(xxh_u64 x, xxh_u64 y)
3708 {
3709  return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
3710 }
3711 #elif defined(_MSC_VER) && defined(_M_IX86)
3712 # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3713 #else
3714 /*
3715  * Downcast + upcast is usually better than masking on older compilers like
3716  * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
3717  *
3718  * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
3719  * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
3720  */
3721 # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
3722 #endif
3723 
3733 static XXH128_hash_t
3734 XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3735 {
3736  /*
3737  * GCC/Clang __uint128_t method.
3738  *
3739  * On most 64-bit targets, GCC and Clang define a __uint128_t type.
3740  * This is usually the best way as it usually uses a native long 64-bit
3741  * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
3742  *
3743  * Usually.
3744  *
3745  * Despite being a 32-bit platform, Clang (and emscripten) define this type
3746  * despite not having the arithmetic for it. This results in a laggy
3747  * compiler builtin call which calculates a full 128-bit multiply.
3748  * In that case it is best to use the portable one.
3749  * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
3750  */
3751 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
3752  && defined(__SIZEOF_INT128__) \
3753  || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3754 
3755  __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
3756  XXH128_hash_t r128;
3757  r128.low64 = (xxh_u64)(product);
3758  r128.high64 = (xxh_u64)(product >> 64);
3759  return r128;
3760 
3761  /*
3762  * MSVC for x64's _umul128 method.
3763  *
3764  * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
3765  *
3766  * This compiles to single operand MUL on x64.
3767  */
3768 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
3769 
3770 #ifndef _MSC_VER
3771 # pragma intrinsic(_umul128)
3772 #endif
3773  xxh_u64 product_high;
3774  xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
3775  XXH128_hash_t r128;
3776  r128.low64 = product_low;
3777  r128.high64 = product_high;
3778  return r128;
3779 
3780  /*
3781  * MSVC for ARM64's __umulh method.
3782  *
3783  * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
3784  */
3785 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
3786 
3787 #ifndef _MSC_VER
3788 # pragma intrinsic(__umulh)
3789 #endif
3790  XXH128_hash_t r128;
3791  r128.low64 = lhs * rhs;
3792  r128.high64 = __umulh(lhs, rhs);
3793  return r128;
3794 
3795 #else
3796  /*
3797  * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
3798  *
3799  * This is a fast and simple grade school multiply, which is shown below
3800  * with base 10 arithmetic instead of base 0x100000000.
3801  *
3802  * 9 3 // D2 lhs = 93
3803  * x 7 5 // D2 rhs = 75
3804  * ----------
3805  * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
3806  * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
3807  * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
3808  * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
3809  * ---------
3810  * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
3811  * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
3812  * ---------
3813  * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
3814  *
3815  * The reasons for adding the products like this are:
3816  * 1. It avoids manual carry tracking. Just like how
3817  * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
3818  * This avoids a lot of complexity.
3819  *
3820  * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
3821  * instruction available in ARM's Digital Signal Processing extension
3822  * in 32-bit ARMv6 and later, which is shown below:
3823  *
3824  * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
3825  * {
3826  * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
3827  * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
3828  * *RdHi = (xxh_u32)(product >> 32);
3829  * }
3830  *
3831  * This instruction was designed for efficient long multiplication, and
3832  * allows this to be calculated in only 4 instructions at speeds
3833  * comparable to some 64-bit ALUs.
3834  *
3835  * 3. It isn't terrible on other platforms. Usually this will be a couple
3836  * of 32-bit ADD/ADCs.
3837  */
3838 
3839  /* First calculate all of the cross products. */
3840  xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
3841  xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
3842  xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
3843  xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
3844 
3845  /* Now add the products together. These will never overflow. */
3846  xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
3847  xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
3848  xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
3849 
3850  XXH128_hash_t r128;
3851  r128.low64 = lower;
3852  r128.high64 = upper;
3853  return r128;
3854 #endif
3855 }
3856 
3867 static xxh_u64
3868 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3869 {
3870  XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
3871  return product.low64 ^ product.high64;
3872 }
3873 
3875 XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
3876 {
3877  XXH_ASSERT(0 <= shift && shift < 64);
3878  return v64 ^ (v64 >> shift);
3879 }
3880 
3881 /*
3882  * This is a fast avalanche stage,
3883  * suitable when input bits are already partially mixed
3884  */
3885 static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
3886 {
3887  h64 = XXH_xorshift64(h64, 37);
3888  h64 *= PRIME_MX1;
3889  h64 = XXH_xorshift64(h64, 32);
3890  return h64;
3891 }
3892 
3893 /*
3894  * This is a stronger avalanche,
3895  * inspired by Pelle Evensen's rrmxmx
3896  * preferable when input has not been previously mixed
3897  */
3898 static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3899 {
3900  /* this mix is inspired by Pelle Evensen's rrmxmx */
3901  h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
3902  h64 *= PRIME_MX2;
3903  h64 ^= (h64 >> 35) + len ;
3904  h64 *= PRIME_MX2;
3905  return XXH_xorshift64(h64, 28);
3906 }
3907 
3908 
3909 /* ==========================================
3910  * Short keys
3911  * ==========================================
3912  * One of the shortcomings of XXH32 and XXH64 was that their performance was
3913  * sub-optimal on short lengths. It used an iterative algorithm which strongly
3914  * favored lengths that were a multiple of 4 or 8.
3915  *
3916  * Instead of iterating over individual inputs, we use a set of single shot
3917  * functions which piece together a range of lengths and operate in constant time.
3918  *
3919  * Additionally, the number of multiplies has been significantly reduced. This
3920  * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
3921  *
3922  * Depending on the platform, this may or may not be faster than XXH32, but it
3923  * is almost guaranteed to be faster than XXH64.
3924  */
3925 
3926 /*
3927  * At very short lengths, there isn't enough input to fully hide secrets, or use
3928  * the entire secret.
3929  *
3930  * There is also only a limited amount of mixing we can do before significantly
3931  * impacting performance.
3932  *
3933  * Therefore, we use different sections of the secret and always mix two secret
3934  * samples with an XOR. This should have no effect on performance on the
3935  * seedless or withSeed variants because everything _should_ be constant folded
3936  * by modern compilers.
3937  *
3938  * The XOR mixing hides individual parts of the secret and increases entropy.
3939  *
3940  * This adds an extra layer of strength for custom secrets.
3941  */
3942 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3943 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3944 {
3945  XXH_ASSERT(input != NULL);
3946  XXH_ASSERT(1 <= len && len <= 3);
3947  XXH_ASSERT(secret != NULL);
3948  /*
3949  * len = 1: combined = { input[0], 0x01, input[0], input[0] }
3950  * len = 2: combined = { input[1], 0x02, input[0], input[1] }
3951  * len = 3: combined = { input[2], 0x03, input[0], input[1] }
3952  */
3953  { xxh_u8 const c1 = input[0];
3954  xxh_u8 const c2 = input[len >> 1];
3955  xxh_u8 const c3 = input[len - 1];
3956  xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)
3957  | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
3958  // cppcheck-suppress nullPointerArithmeticRedundantCheck; false positive
3959  xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
3960  xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
3961  return XXH64_avalanche(keyed);
3962  }
3963 }
3964 
3965 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3966 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3967 {
3968  XXH_ASSERT(input != NULL);
3969  XXH_ASSERT(secret != NULL);
3970  XXH_ASSERT(4 <= len && len <= 8);
3971  seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
3972  { xxh_u32 const input1 = XXH_readLE32(input);
3973  xxh_u32 const input2 = XXH_readLE32(input + len - 4);
3974  xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
3975  xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
3976  xxh_u64 const keyed = input64 ^ bitflip;
3977  return XXH3_rrmxmx(keyed, len);
3978  }
3979 }
3980 
3981 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3982 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3983 {
3984  XXH_ASSERT(input != NULL);
3985  XXH_ASSERT(secret != NULL);
3986  XXH_ASSERT(9 <= len && len <= 16);
3987  { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
3988  xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
3989  xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
3990  xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
3991  xxh_u64 const acc = len
3992  + XXH_swap64(input_lo) + input_hi
3993  + XXH3_mul128_fold64(input_lo, input_hi);
3994  return XXH3_avalanche(acc);
3995  }
3996 }
3997 
3998 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3999 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4000 {
4001  XXH_ASSERT(len <= 16);
4002  { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
4003  if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
4004  if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
4005  return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
4006  }
4007 }
4008 
4009 /*
4010  * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
4011  * multiplication by zero, affecting hashes of lengths 17 to 240.
4012  *
4013  * However, they are very unlikely.
4014  *
4015  * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
4016  * unseeded non-cryptographic hashes, it does not attempt to defend itself
4017  * against specially crafted inputs, only random inputs.
4018  *
4019  * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
4020  * cancelling out the secret is taken an arbitrary number of times (addressed
4021  * in XXH3_accumulate_512), this collision is very unlikely with random inputs
4022  * and/or proper seeding:
4023  *
4024  * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
4025  * function that is only called up to 16 times per hash with up to 240 bytes of
4026  * input.
4027  *
4028  * This is not too bad for a non-cryptographic hash function, especially with
4029  * only 64 bit outputs.
4030  *
4031  * The 128-bit variant (which trades some speed for strength) is NOT affected
4032  * by this, although it is always a good idea to use a proper seed if you care
4033  * about strength.
4034  */
4035 XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
4036  const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
4037 {
4038 #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
4039  && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
4040  && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
4041  /*
4042  * UGLY HACK:
4043  * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
4044  * slower code.
4045  *
4046  * By forcing seed64 into a register, we disrupt the cost model and
4047  * cause it to scalarize. See `XXH32_round()`
4048  *
4049  * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
4050  * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
4051  * GCC 9.2, despite both emitting scalar code.
4052  *
4053  * GCC generates much better scalar code than Clang for the rest of XXH3,
4054  * which is why finding a more optimal codepath is an interest.
4055  */
4056  XXH_COMPILER_GUARD(seed64);
4057 #endif
4058  { xxh_u64 const input_lo = XXH_readLE64(input);
4059  xxh_u64 const input_hi = XXH_readLE64(input+8);
4060  return XXH3_mul128_fold64(
4061  input_lo ^ (XXH_readLE64(secret) + seed64),
4062  input_hi ^ (XXH_readLE64(secret+8) - seed64)
4063  );
4064  }
4065 }
4066 
4067 /* For mid range keys, XXH3 uses a Mum-hash variant. */
4068 XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4069 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4070  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4071  XXH64_hash_t seed)
4072 {
4073  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4074  XXH_ASSERT(16 < len && len <= 128);
4075 
4076  { xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
4077 #if XXH_SIZE_OPT >= 1
4078  /* Smaller and cleaner, but slightly slower. */
4079  unsigned int i = (unsigned int)(len - 1) / 32;
4080  do {
4081  acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
4082  acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
4083  } while (i-- != 0);
4084  acc_end = 0;
4085 #else
4086  acc += XXH3_mix16B(input+0, secret+0, seed);
4087  acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
4088  if (len > 32) {
4089  acc += XXH3_mix16B(input+16, secret+32, seed);
4090  acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
4091  if (len > 64) {
4092  acc += XXH3_mix16B(input+32, secret+64, seed);
4093  acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
4094 
4095  if (len > 96) {
4096  acc += XXH3_mix16B(input+48, secret+96, seed);
4097  acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
4098  }
4099  }
4100  }
4101 #endif
4102  return XXH3_avalanche(acc + acc_end);
4103  }
4104 }
4105 
4106 #define XXH3_MIDSIZE_MAX 240
4107 
4108 XXH_NO_INLINE XXH_PUREF XXH64_hash_t
4109 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4110  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4111  XXH64_hash_t seed)
4112 {
4113  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4114  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4115 
4116  #define XXH3_MIDSIZE_STARTOFFSET 3
4117  #define XXH3_MIDSIZE_LASTOFFSET 17
4118 
4119  { xxh_u64 acc = len * XXH_PRIME64_1;
4120  xxh_u64 acc_end;
4121  unsigned int const nbRounds = (unsigned int)len / 16;
4122  unsigned int i;
4123  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4124  for (i=0; i<8; i++) {
4125  acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
4126  }
4127  /* last bytes */
4128  acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
4129  XXH_ASSERT(nbRounds >= 8);
4130  acc = XXH3_avalanche(acc);
4131 #if defined(__clang__) /* Clang */ \
4132  && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
4133  && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
4134  /*
4135  * UGLY HACK:
4136  * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
4137  * In everywhere else, it uses scalar code.
4138  *
4139  * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
4140  * would still be slower than UMAAL (see XXH_mult64to128).
4141  *
4142  * Unfortunately, Clang doesn't handle the long multiplies properly and
4143  * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
4144  * scalarized into an ugly mess of VMOV.32 instructions.
4145  *
4146  * This mess is difficult to avoid without turning autovectorization
4147  * off completely, but they are usually relatively minor and/or not
4148  * worth it to fix.
4149  *
4150  * This loop is the easiest to fix, as unlike XXH32, this pragma
4151  * _actually works_ because it is a loop vectorization instead of an
4152  * SLP vectorization.
4153  */
4154  #pragma clang loop vectorize(disable)
4155 #endif
4156  for (i=8 ; i < nbRounds; i++) {
4157  /*
4158  * Prevents clang for unrolling the acc loop and interleaving with this one.
4159  */
4160  XXH_COMPILER_GUARD(acc);
4161  acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
4162  }
4163  return XXH3_avalanche(acc + acc_end);
4164  }
4165 }
4166 
4167 
4168 /* ======= Long Keys ======= */
4169 
4170 #define XXH_STRIPE_LEN 64
4171 #define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
4172 #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
4173 
4174 #ifdef XXH_OLD_NAMES
4175 # define STRIPE_LEN XXH_STRIPE_LEN
4176 # define ACC_NB XXH_ACC_NB
4177 #endif
4178 
4179 #ifndef XXH_PREFETCH_DIST
4180 # ifdef __clang__
4181 # define XXH_PREFETCH_DIST 320
4182 # else
4183 # if (XXH_VECTOR == XXH_AVX512)
4184 # define XXH_PREFETCH_DIST 512
4185 # else
4186 # define XXH_PREFETCH_DIST 384
4187 # endif
4188 # endif /* __clang__ */
4189 #endif /* XXH_PREFETCH_DIST */
4190 
4191 /*
4192  * These macros are to generate an XXH3_accumulate() function.
4193  * The two arguments select the name suffix and target attribute.
4194  *
4195  * The name of this symbol is XXH3_accumulate_<name>() and it calls
4196  * XXH3_accumulate_512_<name>().
4197  *
4198  * It may be useful to hand implement this function if the compiler fails to
4199  * optimize the inline function.
4200  */
4201 #define XXH3_ACCUMULATE_TEMPLATE(name) \
4202 void \
4203 XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
4204  const xxh_u8* XXH_RESTRICT input, \
4205  const xxh_u8* XXH_RESTRICT secret, \
4206  size_t nbStripes) \
4207 { \
4208  size_t n; \
4209  for (n = 0; n < nbStripes; n++ ) { \
4210  const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
4211  XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
4212  XXH3_accumulate_512_##name( \
4213  acc, \
4214  in, \
4215  secret + n*XXH_SECRET_CONSUME_RATE); \
4216  } \
4217 }
4218 
4219 
4220 XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
4221 {
4222  if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
4223  XXH_memcpy(dst, &v64, sizeof(v64));
4224 }
4225 
4226 /* Several intrinsic functions below are supposed to accept __int64 as argument,
4227  * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
4228  * However, several environments do not define __int64 type,
4229  * requiring a workaround.
4230  */
4231 #if !defined (__VMS) \
4232  && (defined (__cplusplus) \
4233  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
4234  typedef int64_t xxh_i64;
4235 #else
4236  /* the following type must have a width of 64-bit */
4237  typedef long long xxh_i64;
4238 #endif
4239 
4240 
4241 /*
4242  * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
4243  *
4244  * It is a hardened version of UMAC, based off of FARSH's implementation.
4245  *
4246  * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
4247  * implementations, and it is ridiculously fast.
4248  *
4249  * We harden it by mixing the original input to the accumulators as well as the product.
4250  *
4251  * This means that in the (relatively likely) case of a multiply by zero, the
4252  * original input is preserved.
4253  *
4254  * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
4255  * cross-pollination, as otherwise the upper and lower halves would be
4256  * essentially independent.
4257  *
4258  * This doesn't matter on 64-bit hashes since they all get merged together in
4259  * the end, so we skip the extra step.
4260  *
4261  * Both XXH3_64bits and XXH3_128bits use this subroutine.
4262  */
4263 
4264 #if (XXH_VECTOR == XXH_AVX512) \
4265  || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
4266 
4267 #ifndef XXH_TARGET_AVX512
4268 # define XXH_TARGET_AVX512 /* disable attribute target */
4269 #endif
4270 
4271 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4272 XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
4273  const void* XXH_RESTRICT input,
4274  const void* XXH_RESTRICT secret)
4275 {
4276  __m512i* const xacc = (__m512i *) acc;
4277  XXH_ASSERT((((size_t)acc) & 63) == 0);
4278  XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4279 
4280  {
4281  /* data_vec = input[0]; */
4282  __m512i const data_vec = _mm512_loadu_si512 (input);
4283  /* key_vec = secret[0]; */
4284  __m512i const key_vec = _mm512_loadu_si512 (secret);
4285  /* data_key = data_vec ^ key_vec; */
4286  __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
4287  /* data_key_lo = data_key >> 32; */
4288  __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
4289  /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4290  __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
4291  /* xacc[0] += swap(data_vec); */
4292  __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
4293  __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
4294  /* xacc[0] += product; */
4295  *xacc = _mm512_add_epi64(product, sum);
4296  }
4297 }
4298 XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
4299 
4300 /*
4301  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
4302  *
4303  * Multiplication isn't perfect, as explained by Google in HighwayHash:
4304  *
4305  * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
4306  * // varying degrees. In descending order of goodness, bytes
4307  * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
4308  * // As expected, the upper and lower bytes are much worse.
4309  *
4310  * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
4311  *
4312  * Since our algorithm uses a pseudorandom secret to add some variance into the
4313  * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
4314  *
4315  * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
4316  * extraction.
4317  *
4318  * Both XXH3_64bits and XXH3_128bits use this subroutine.
4319  */
4320 
4321 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4322 XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4323 {
4324  XXH_ASSERT((((size_t)acc) & 63) == 0);
4325  XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4326  { __m512i* const xacc = (__m512i*) acc;
4327  const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
4328 
4329  /* xacc[0] ^= (xacc[0] >> 47) */
4330  __m512i const acc_vec = *xacc;
4331  __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
4332  /* xacc[0] ^= secret; */
4333  __m512i const key_vec = _mm512_loadu_si512 (secret);
4334  __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
4335 
4336  /* xacc[0] *= XXH_PRIME32_1; */
4337  __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
4338  __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
4339  __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
4340  *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
4341  }
4342 }
4343 
4344 XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4345 XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4346 {
4347  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
4348  XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
4349  XXH_ASSERT(((size_t)customSecret & 63) == 0);
4350  (void)(&XXH_writeLE64);
4351  { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
4352  __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
4353  __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
4354 
4355  const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
4356  __m512i* const dest = ( __m512i*) customSecret;
4357  int i;
4358  XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
4359  XXH_ASSERT(((size_t)dest & 63) == 0);
4360  for (i=0; i < nbRounds; ++i) {
4361  dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
4362  } }
4363 }
4364 
4365 #endif
4366 
4367 #if (XXH_VECTOR == XXH_AVX2) \
4368  || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
4369 
4370 #ifndef XXH_TARGET_AVX2
4371 # define XXH_TARGET_AVX2 /* disable attribute target */
4372 #endif
4373 
4374 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4375 XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
4376  const void* XXH_RESTRICT input,
4377  const void* XXH_RESTRICT secret)
4378 {
4379  XXH_ASSERT((((size_t)acc) & 31) == 0);
4380  { __m256i* const xacc = (__m256i *) acc;
4381  /* Unaligned. This is mainly for pointer arithmetic, and because
4382  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4383  const __m256i* const xinput = (const __m256i *) input;
4384  /* Unaligned. This is mainly for pointer arithmetic, and because
4385  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4386  const __m256i* const xsecret = (const __m256i *) secret;
4387 
4388  size_t i;
4389  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4390  /* data_vec = xinput[i]; */
4391  __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
4392  /* key_vec = xsecret[i]; */
4393  __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4394  /* data_key = data_vec ^ key_vec; */
4395  __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4396  /* data_key_lo = data_key >> 32; */
4397  __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
4398  /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4399  __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
4400  /* xacc[i] += swap(data_vec); */
4401  __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
4402  __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
4403  /* xacc[i] += product; */
4404  xacc[i] = _mm256_add_epi64(product, sum);
4405  } }
4406 }
4407 XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
4408 
4409 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4410 XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4411 {
4412  XXH_ASSERT((((size_t)acc) & 31) == 0);
4413  { __m256i* const xacc = (__m256i*) acc;
4414  /* Unaligned. This is mainly for pointer arithmetic, and because
4415  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4416  const __m256i* const xsecret = (const __m256i *) secret;
4417  const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
4418 
4419  size_t i;
4420  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4421  /* xacc[i] ^= (xacc[i] >> 47) */
4422  __m256i const acc_vec = xacc[i];
4423  __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);
4424  __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);
4425  /* xacc[i] ^= xsecret; */
4426  __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4427  __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4428 
4429  /* xacc[i] *= XXH_PRIME32_1; */
4430  __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
4431  __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
4432  __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
4433  xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
4434  }
4435  }
4436 }
4437 
4438 XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4439 {
4440  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
4441  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
4442  XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
4443  (void)(&XXH_writeLE64);
4444  XXH_PREFETCH(customSecret);
4445  { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
4446 
4447  const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);
4448  __m256i* dest = ( __m256i*) customSecret;
4449 
4450 # if defined(__GNUC__) || defined(__clang__)
4451  /*
4452  * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4453  * - do not extract the secret from sse registers in the internal loop
4454  * - use less common registers, and avoid pushing these reg into stack
4455  */
4456  XXH_COMPILER_GUARD(dest);
4457 # endif
4458  XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
4459  XXH_ASSERT(((size_t)dest & 31) == 0);
4460 
4461  /* GCC -O2 need unroll loop manually */
4462  dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
4463  dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
4464  dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
4465  dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
4466  dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
4467  dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
4468  }
4469 }
4470 
4471 #endif
4472 
4473 /* x86dispatch always generates SSE2 */
4474 #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
4475 
4476 #ifndef XXH_TARGET_SSE2
4477 # define XXH_TARGET_SSE2 /* disable attribute target */
4478 #endif
4479 
4480 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4481 XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
4482  const void* XXH_RESTRICT input,
4483  const void* XXH_RESTRICT secret)
4484 {
4485  /* SSE2 is just a half-scale version of the AVX2 version. */
4486  XXH_ASSERT((((size_t)acc) & 15) == 0);
4487  { __m128i* const xacc = (__m128i *) acc;
4488  /* Unaligned. This is mainly for pointer arithmetic, and because
4489  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4490  const __m128i* const xinput = (const __m128i *) input;
4491  /* Unaligned. This is mainly for pointer arithmetic, and because
4492  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4493  const __m128i* const xsecret = (const __m128i *) secret;
4494 
4495  size_t i;
4496  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4497  /* data_vec = xinput[i]; */
4498  __m128i const data_vec = _mm_loadu_si128 (xinput+i);
4499  /* key_vec = xsecret[i]; */
4500  __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4501  /* data_key = data_vec ^ key_vec; */
4502  __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4503  /* data_key_lo = data_key >> 32; */
4504  __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4505  /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4506  __m128i const product = _mm_mul_epu32 (data_key, data_key_lo);
4507  /* xacc[i] += swap(data_vec); */
4508  __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
4509  __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
4510  /* xacc[i] += product; */
4511  xacc[i] = _mm_add_epi64(product, sum);
4512  } }
4513 }
4514 XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
4515 
4516 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4517 XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4518 {
4519  XXH_ASSERT((((size_t)acc) & 15) == 0);
4520  { __m128i* const xacc = (__m128i*) acc;
4521  /* Unaligned. This is mainly for pointer arithmetic, and because
4522  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4523  const __m128i* const xsecret = (const __m128i *) secret;
4524  const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
4525 
4526  size_t i;
4527  for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4528  /* xacc[i] ^= (xacc[i] >> 47) */
4529  __m128i const acc_vec = xacc[i];
4530  __m128i const shifted = _mm_srli_epi64 (acc_vec, 47);
4531  __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);
4532  /* xacc[i] ^= xsecret[i]; */
4533  __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4534  __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4535 
4536  /* xacc[i] *= XXH_PRIME32_1; */
4537  __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4538  __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);
4539  __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
4540  xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
4541  }
4542  }
4543 }
4544 
4545 XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4546 {
4547  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4548  (void)(&XXH_writeLE64);
4549  { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
4550 
4551 # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
4552  /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
4553  XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
4554  __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
4555 # else
4556  __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
4557 # endif
4558  int i;
4559 
4560  const void* const src16 = XXH3_kSecret;
4561  __m128i* dst16 = (__m128i*) customSecret;
4562 # if defined(__GNUC__) || defined(__clang__)
4563  /*
4564  * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4565  * - do not extract the secret from sse registers in the internal loop
4566  * - use less common registers, and avoid pushing these reg into stack
4567  */
4568  XXH_COMPILER_GUARD(dst16);
4569 # endif
4570  XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
4571  XXH_ASSERT(((size_t)dst16 & 15) == 0);
4572 
4573  for (i=0; i < nbRounds; ++i) {
4574  dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
4575  } }
4576 }
4577 
4578 #endif
4579 
4580 #if (XXH_VECTOR == XXH_NEON)
4581 
4582 /* forward declarations for the scalar routines */
4583 XXH_FORCE_INLINE void
4584 XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
4585  void const* XXH_RESTRICT secret, size_t lane);
4586 
4587 XXH_FORCE_INLINE void
4588 XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
4589  void const* XXH_RESTRICT secret, size_t lane);
4590 
4611 XXH_FORCE_INLINE void
4612 XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
4613  const void* XXH_RESTRICT input,
4614  const void* XXH_RESTRICT secret)
4615 {
4616  XXH_ASSERT((((size_t)acc) & 15) == 0);
4617  XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4618  { /* GCC for darwin arm64 does not like aliasing here */
4619  xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
4620  /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
4621  uint8_t const* const xinput = (const uint8_t *) input;
4622  uint8_t const* const xsecret = (const uint8_t *) secret;
4623 
4624  size_t i;
4625  /* Scalar lanes use the normal scalarRound routine */
4626  for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4627  XXH3_scalarRound(acc, input, secret, i);
4628  }
4629  i = 0;
4630  /* 4 NEON lanes at a time. */
4631  for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
4632  /* data_vec = xinput[i]; */
4633  uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
4634  uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
4635  /* key_vec = xsecret[i]; */
4636  uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
4637  uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
4638  /* data_swap = swap(data_vec) */
4639  uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
4640  uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
4641  /* data_key = data_vec ^ key_vec; */
4642  uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
4643  uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
4644 
4645  /*
4646  * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
4647  * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
4648  * get one vector with the low 32 bits of each lane, and one vector
4649  * with the high 32 bits of each lane.
4650  *
4651  * This compiles to two instructions on AArch64 and has a paired vector
4652  * result, which is an artifact from ARMv7a's version which modified both
4653  * vectors in place.
4654  *
4655  * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
4656  * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
4657  */
4658  uint32x4x2_t unzipped = vuzpq_u32(
4659  vreinterpretq_u32_u64(data_key_1),
4660  vreinterpretq_u32_u64(data_key_2)
4661  );
4662  /* data_key_lo = data_key & 0xFFFFFFFF */
4663  uint32x4_t data_key_lo = unzipped.val[0];
4664  /* data_key_hi = data_key >> 32 */
4665  uint32x4_t data_key_hi = unzipped.val[1];
4666  /*
4667  * Then, we can split the vectors horizontally and multiply which, as for most
4668  * widening intrinsics, have a variant that works on both high half vectors
4669  * for free on AArch64.
4670  *
4671  * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
4672  */
4673  uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
4674  uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
4675  /*
4676  * Clang reorders
4677  * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s
4678  * c += a; // add acc.2d, acc.2d, swap.2d
4679  * to
4680  * c += a; // add acc.2d, acc.2d, swap.2d
4681  * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s
4682  *
4683  * While it would make sense in theory since the addition is faster,
4684  * for reasons likely related to umlal being limited to certain NEON
4685  * pipelines, this is worse. A compiler guard fixes this.
4686  */
4687  XXH_COMPILER_GUARD_W(sum_1);
4688  XXH_COMPILER_GUARD_W(sum_2);
4689  /* xacc[i] = acc_vec + sum; */
4690  xacc[i] = vaddq_u64(xacc[i], sum_1);
4691  xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
4692  }
4693  /* Operate on the remaining NEON lanes 2 at a time. */
4694  for (; i < XXH3_NEON_LANES / 2; i++) {
4695  /* data_vec = xinput[i]; */
4696  uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
4697  /* key_vec = xsecret[i]; */
4698  uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4699  /* acc_vec_2 = swap(data_vec) */
4700  uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
4701  /* data_key = data_vec ^ key_vec; */
4702  uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4703  /* For two lanes, just use VMOVN and VSHRN. */
4704  /* data_key_lo = data_key & 0xFFFFFFFF; */
4705  uint32x2_t data_key_lo = vmovn_u64(data_key);
4706  /* data_key_hi = data_key >> 32; */
4707  uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4708  /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
4709  uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
4710  /* Same Clang workaround as before */
4711  XXH_COMPILER_GUARD_W(sum);
4712  /* xacc[i] = acc_vec + sum; */
4713  xacc[i] = vaddq_u64 (xacc[i], sum);
4714  }
4715  }
4716 }
4717 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
4718 
4719 XXH_FORCE_INLINE void
4720 XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4721 {
4722  XXH_ASSERT((((size_t)acc) & 15) == 0);
4723 
4724  { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;
4725  uint8_t const* xsecret = (uint8_t const*) secret;
4726  uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
4727 
4728  size_t i;
4729  /* AArch64 uses both scalar and neon at the same time */
4730  for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4731  XXH3_scalarScrambleRound(acc, secret, i);
4732  }
4733  for (i=0; i < XXH3_NEON_LANES / 2; i++) {
4734  /* xacc[i] ^= (xacc[i] >> 47); */
4735  uint64x2_t acc_vec = xacc[i];
4736  uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
4737  uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
4738 
4739  /* xacc[i] ^= xsecret[i]; */
4740  uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4741  uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4742 
4743  /* xacc[i] *= XXH_PRIME32_1 */
4744  uint32x2_t data_key_lo = vmovn_u64(data_key);
4745  uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4746  /*
4747  * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
4748  *
4749  * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
4750  * incorrectly "optimize" this:
4751  * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b));
4752  * shifted = vshll_n_u32(tmp, 32);
4753  * to this:
4754  * tmp = "vmulq_u64"(a, b); // no such thing!
4755  * shifted = vshlq_n_u64(tmp, 32);
4756  *
4757  * However, unlike SSE, Clang lacks a 64-bit multiply routine
4758  * for NEON, and it scalarizes two 64-bit multiplies instead.
4759  *
4760  * vmull_u32 has the same timing as vmul_u32, and it avoids
4761  * this bug completely.
4762  * See https://bugs.llvm.org/show_bug.cgi?id=39967
4763  */
4764  uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4765  /* xacc[i] = prod_hi << 32; */
4766  prod_hi = vshlq_n_u64(prod_hi, 32);
4767  /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
4768  xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
4769  }
4770  }
4771 }
4772 #endif
4773 
4774 #if (XXH_VECTOR == XXH_VSX)
4775 
4776 XXH_FORCE_INLINE void
4777 XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
4778  const void* XXH_RESTRICT input,
4779  const void* XXH_RESTRICT secret)
4780 {
4781  /* presumed aligned */
4782  xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
4783  xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */
4784  xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
4785  xxh_u64x2 const v32 = { 32, 32 };
4786  size_t i;
4787  for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4788  /* data_vec = xinput[i]; */
4789  xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
4790  /* key_vec = xsecret[i]; */
4791  xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
4792  xxh_u64x2 const data_key = data_vec ^ key_vec;
4793  /* shuffled = (data_key << 32) | (data_key >> 32); */
4794  xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4795  /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
4796  xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4797  /* acc_vec = xacc[i]; */
4798  xxh_u64x2 acc_vec = xacc[i];
4799  acc_vec += product;
4800 
4801  /* swap high and low halves */
4802 #ifdef __s390x__
4803  acc_vec += vec_permi(data_vec, data_vec, 2);
4804 #else
4805  acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4806 #endif
4807  xacc[i] = acc_vec;
4808  }
4809 }
4810 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
4811 
4812 XXH_FORCE_INLINE void
4813 XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4814 {
4815  XXH_ASSERT((((size_t)acc) & 15) == 0);
4816 
4817  { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
4818  const xxh_u8* const xsecret = (const xxh_u8*) secret;
4819  /* constants */
4820  xxh_u64x2 const v32 = { 32, 32 };
4821  xxh_u64x2 const v47 = { 47, 47 };
4822  xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
4823  size_t i;
4824  for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4825  /* xacc[i] ^= (xacc[i] >> 47); */
4826  xxh_u64x2 const acc_vec = xacc[i];
4827  xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
4828 
4829  /* xacc[i] ^= xsecret[i]; */
4830  xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
4831  xxh_u64x2 const data_key = data_vec ^ key_vec;
4832 
4833  /* xacc[i] *= XXH_PRIME32_1 */
4834  /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */
4835  xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
4836  /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */
4837  xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
4838  xacc[i] = prod_odd + (prod_even << v32);
4839  } }
4840 }
4841 
4842 #endif
4843 
4844 #if (XXH_VECTOR == XXH_SVE)
4845 
4846 XXH_FORCE_INLINE void
4847 XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
4848  const void* XXH_RESTRICT input,
4849  const void* XXH_RESTRICT secret)
4850 {
4851  uint64_t *xacc = (uint64_t *)acc;
4852  const uint64_t *xinput = (const uint64_t *)(const void *)input;
4853  const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
4854  svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4855  uint64_t element_count = svcntd();
4856  if (element_count >= 8) {
4857  svbool_t mask = svptrue_pat_b64(SV_VL8);
4858  svuint64_t vacc = svld1_u64(mask, xacc);
4859  ACCRND(vacc, 0);
4860  svst1_u64(mask, xacc, vacc);
4861  } else if (element_count == 2) { /* sve128 */
4862  svbool_t mask = svptrue_pat_b64(SV_VL2);
4863  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4864  svuint64_t acc1 = svld1_u64(mask, xacc + 2);
4865  svuint64_t acc2 = svld1_u64(mask, xacc + 4);
4866  svuint64_t acc3 = svld1_u64(mask, xacc + 6);
4867  ACCRND(acc0, 0);
4868  ACCRND(acc1, 2);
4869  ACCRND(acc2, 4);
4870  ACCRND(acc3, 6);
4871  svst1_u64(mask, xacc + 0, acc0);
4872  svst1_u64(mask, xacc + 2, acc1);
4873  svst1_u64(mask, xacc + 4, acc2);
4874  svst1_u64(mask, xacc + 6, acc3);
4875  } else {
4876  svbool_t mask = svptrue_pat_b64(SV_VL4);
4877  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4878  svuint64_t acc1 = svld1_u64(mask, xacc + 4);
4879  ACCRND(acc0, 0);
4880  ACCRND(acc1, 4);
4881  svst1_u64(mask, xacc + 0, acc0);
4882  svst1_u64(mask, xacc + 4, acc1);
4883  }
4884 }
4885 
4886 XXH_FORCE_INLINE void
4887 XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
4888  const xxh_u8* XXH_RESTRICT input,
4889  const xxh_u8* XXH_RESTRICT secret,
4890  size_t nbStripes)
4891 {
4892  if (nbStripes != 0) {
4893  uint64_t *xacc = (uint64_t *)acc;
4894  const uint64_t *xinput = (const uint64_t *)(const void *)input;
4895  const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
4896  svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4897  uint64_t element_count = svcntd();
4898  if (element_count >= 8) {
4899  svbool_t mask = svptrue_pat_b64(SV_VL8);
4900  svuint64_t vacc = svld1_u64(mask, xacc + 0);
4901  do {
4902  /* svprfd(svbool_t, void *, enum svfprop); */
4903  svprfd(mask, xinput + 128, SV_PLDL1STRM);
4904  ACCRND(vacc, 0);
4905  xinput += 8;
4906  xsecret += 1;
4907  nbStripes--;
4908  } while (nbStripes != 0);
4909 
4910  svst1_u64(mask, xacc + 0, vacc);
4911  } else if (element_count == 2) { /* sve128 */
4912  svbool_t mask = svptrue_pat_b64(SV_VL2);
4913  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4914  svuint64_t acc1 = svld1_u64(mask, xacc + 2);
4915  svuint64_t acc2 = svld1_u64(mask, xacc + 4);
4916  svuint64_t acc3 = svld1_u64(mask, xacc + 6);
4917  do {
4918  svprfd(mask, xinput + 128, SV_PLDL1STRM);
4919  ACCRND(acc0, 0);
4920  ACCRND(acc1, 2);
4921  ACCRND(acc2, 4);
4922  ACCRND(acc3, 6);
4923  xinput += 8;
4924  xsecret += 1;
4925  nbStripes--;
4926  } while (nbStripes != 0);
4927 
4928  svst1_u64(mask, xacc + 0, acc0);
4929  svst1_u64(mask, xacc + 2, acc1);
4930  svst1_u64(mask, xacc + 4, acc2);
4931  svst1_u64(mask, xacc + 6, acc3);
4932  } else {
4933  svbool_t mask = svptrue_pat_b64(SV_VL4);
4934  svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4935  svuint64_t acc1 = svld1_u64(mask, xacc + 4);
4936  do {
4937  svprfd(mask, xinput + 128, SV_PLDL1STRM);
4938  ACCRND(acc0, 0);
4939  ACCRND(acc1, 4);
4940  xinput += 8;
4941  xsecret += 1;
4942  nbStripes--;
4943  } while (nbStripes != 0);
4944 
4945  svst1_u64(mask, xacc + 0, acc0);
4946  svst1_u64(mask, xacc + 4, acc1);
4947  }
4948  }
4949 }
4950 
4951 #endif
4952 
4953 /* scalar variants - universal */
4954 
4955 #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
4956 /*
4957  * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
4958  * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
4959  *
4960  * While this might not seem like much, as AArch64 is a 64-bit architecture, only
4961  * big Cortex designs have a full 64-bit multiplier.
4962  *
4963  * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
4964  * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
4965  * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
4966  *
4967  * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
4968  * not have this penalty and does the mask automatically.
4969  */
4970 XXH_FORCE_INLINE xxh_u64
4971 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
4972 {
4973  xxh_u64 ret;
4974  /* note: %x = 64-bit register, %w = 32-bit register */
4975  __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
4976  return ret;
4977 }
4978 #else
4979 XXH_FORCE_INLINE xxh_u64
4980 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
4981 {
4982  return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
4983 }
4984 #endif
4985 
4993 XXH_FORCE_INLINE void
4994 XXH3_scalarRound(void* XXH_RESTRICT acc,
4995  void const* XXH_RESTRICT input,
4996  void const* XXH_RESTRICT secret,
4997  size_t lane)
4998 {
4999  xxh_u64* xacc = (xxh_u64*) acc;
5000  xxh_u8 const* xinput = (xxh_u8 const*) input;
5001  xxh_u8 const* xsecret = (xxh_u8 const*) secret;
5002  XXH_ASSERT(lane < XXH_ACC_NB);
5003  XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
5004  {
5005  xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
5006  xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
5007  xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
5008  xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
5009  }
5010 }
5011 
5016 XXH_FORCE_INLINE void
5017 XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
5018  const void* XXH_RESTRICT input,
5019  const void* XXH_RESTRICT secret)
5020 {
5021  size_t i;
5022  /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
5023 #if defined(__GNUC__) && !defined(__clang__) \
5024  && (defined(__arm__) || defined(__thumb2__)) \
5025  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
5026  && XXH_SIZE_OPT <= 0
5027 # pragma GCC unroll 8
5028 #endif
5029  for (i=0; i < XXH_ACC_NB; i++) {
5030  XXH3_scalarRound(acc, input, secret, i);
5031  }
5032 }
5033 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
5034 
5035 
5042 XXH_FORCE_INLINE void
5043 XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
5044  void const* XXH_RESTRICT secret,
5045  size_t lane)
5046 {
5047  xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
5048  const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
5049  XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
5050  XXH_ASSERT(lane < XXH_ACC_NB);
5051  {
5052  xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
5053  xxh_u64 acc64 = xacc[lane];
5054  acc64 = XXH_xorshift64(acc64, 47);
5055  acc64 ^= key64;
5056  acc64 *= XXH_PRIME32_1;
5057  xacc[lane] = acc64;
5058  }
5059 }
5060 
5065 XXH_FORCE_INLINE void
5066 XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5067 {
5068  size_t i;
5069  for (i=0; i < XXH_ACC_NB; i++) {
5070  XXH3_scalarScrambleRound(acc, secret, i);
5071  }
5072 }
5073 
5074 XXH_FORCE_INLINE void
5075 XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5076 {
5077  /*
5078  * We need a separate pointer for the hack below,
5079  * which requires a non-const pointer.
5080  * Any decent compiler will optimize this out otherwise.
5081  */
5082  const xxh_u8* kSecretPtr = XXH3_kSecret;
5083  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
5084 
5085 #if defined(__GNUC__) && defined(__aarch64__)
5086  /*
5087  * UGLY HACK:
5088  * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
5089  * placed sequentially, in order, at the top of the unrolled loop.
5090  *
5091  * While MOVK is great for generating constants (2 cycles for a 64-bit
5092  * constant compared to 4 cycles for LDR), it fights for bandwidth with
5093  * the arithmetic instructions.
5094  *
5095  * I L S
5096  * MOVK
5097  * MOVK
5098  * MOVK
5099  * MOVK
5100  * ADD
5101  * SUB STR
5102  * STR
5103  * By forcing loads from memory (as the asm line causes the compiler to assume
5104  * that XXH3_kSecretPtr has been changed), the pipelines are used more
5105  * efficiently:
5106  * I L S
5107  * LDR
5108  * ADD LDR
5109  * SUB STR
5110  * STR
5111  *
5112  * See XXH3_NEON_LANES for details on the pipsline.
5113  *
5114  * XXH3_64bits_withSeed, len == 256, Snapdragon 835
5115  * without hack: 2654.4 MB/s
5116  * with hack: 3202.9 MB/s
5117  */
5118  XXH_COMPILER_GUARD(kSecretPtr);
5119 #endif
5120  { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
5121  int i;
5122  for (i=0; i < nbRounds; i++) {
5123  /*
5124  * The asm hack causes the compiler to assume that kSecretPtr aliases with
5125  * customSecret, and on aarch64, this prevented LDP from merging two
5126  * loads together for free. Putting the loads together before the stores
5127  * properly generates LDP.
5128  */
5129  xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;
5130  xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
5131  XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);
5132  XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
5133  } }
5134 }
5135 
5136 
5137 typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
5138 typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
5139 typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
5140 
5141 
5142 #if (XXH_VECTOR == XXH_AVX512)
5143 
5144 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
5145 #define XXH3_accumulate XXH3_accumulate_avx512
5146 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
5147 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
5148 
5149 #elif (XXH_VECTOR == XXH_AVX2)
5150 
5151 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
5152 #define XXH3_accumulate XXH3_accumulate_avx2
5153 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
5154 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
5155 
5156 #elif (XXH_VECTOR == XXH_SSE2)
5157 
5158 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
5159 #define XXH3_accumulate XXH3_accumulate_sse2
5160 #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
5161 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
5162 
5163 #elif (XXH_VECTOR == XXH_NEON)
5164 
5165 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
5166 #define XXH3_accumulate XXH3_accumulate_neon
5167 #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
5168 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5169 
5170 #elif (XXH_VECTOR == XXH_VSX)
5171 
5172 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
5173 #define XXH3_accumulate XXH3_accumulate_vsx
5174 #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
5175 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5176 
5177 #elif (XXH_VECTOR == XXH_SVE)
5178 #define XXH3_accumulate_512 XXH3_accumulate_512_sve
5179 #define XXH3_accumulate XXH3_accumulate_sve
5180 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5181 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5182 
5183 #else /* scalar */
5184 
5185 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
5186 #define XXH3_accumulate XXH3_accumulate_scalar
5187 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5188 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5189 
5190 #endif
5191 
5192 #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
5193 # undef XXH3_initCustomSecret
5194 # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5195 #endif
5196 
5197 XXH_FORCE_INLINE void
5198 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
5199  const xxh_u8* XXH_RESTRICT input, size_t len,
5200  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5201  XXH3_f_accumulate f_acc,
5202  XXH3_f_scrambleAcc f_scramble)
5203 {
5204  size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
5205  size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
5206  size_t const nb_blocks = (len - 1) / block_len;
5207 
5208  size_t n;
5209 
5210  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5211 
5212  for (n = 0; n < nb_blocks; n++) {
5213  f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
5214  f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
5215  }
5216 
5217  /* last partial block */
5218  XXH_ASSERT(len > XXH_STRIPE_LEN);
5219  { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
5220  XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
5221  f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
5222 
5223  /* last stripe */
5224  { const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
5225 #define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
5226  XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
5227  } }
5228 }
5229 
5230 XXH_FORCE_INLINE xxh_u64
5231 XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
5232 {
5233  return XXH3_mul128_fold64(
5234  acc[0] ^ XXH_readLE64(secret),
5235  acc[1] ^ XXH_readLE64(secret+8) );
5236 }
5237 
5238 static XXH64_hash_t
5239 XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
5240 {
5241  xxh_u64 result64 = start;
5242  size_t i = 0;
5243 
5244  for (i = 0; i < 4; i++) {
5245  result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
5246 #if defined(__clang__) /* Clang */ \
5247  && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
5248  && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
5249  && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
5250  /*
5251  * UGLY HACK:
5252  * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
5253  * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
5254  * XXH3_64bits, len == 256, Snapdragon 835:
5255  * without hack: 2063.7 MB/s
5256  * with hack: 2560.7 MB/s
5257  */
5258  XXH_COMPILER_GUARD(result64);
5259 #endif
5260  }
5261 
5262  return XXH3_avalanche(result64);
5263 }
5264 
5265 #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
5266  XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
5267 
5268 XXH_FORCE_INLINE XXH64_hash_t
5269 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
5270  const void* XXH_RESTRICT secret, size_t secretSize,
5271  XXH3_f_accumulate f_acc,
5272  XXH3_f_scrambleAcc f_scramble)
5273 {
5274  XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5275 
5276  XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
5277 
5278  /* converge into final hash */
5279  XXH_STATIC_ASSERT(sizeof(acc) == 64);
5280  /* do not align on 8, so that the secret is different from the accumulator */
5281 #define XXH_SECRET_MERGEACCS_START 11
5282  XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
5283  return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
5284 }
5285 
5286 /*
5287  * It's important for performance to transmit secret's size (when it's static)
5288  * so that the compiler can properly optimize the vectorized loop.
5289  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
5290  * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
5291  * breaks -Og, this is XXH_NO_INLINE.
5292  */
5293 XXH3_WITH_SECRET_INLINE XXH64_hash_t
5294 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
5295  XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5296 {
5297  (void)seed64;
5298  return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
5299 }
5300 
5301 /*
5302  * It's preferable for performance that XXH3_hashLong is not inlined,
5303  * as it results in a smaller function for small data, easier to the instruction cache.
5304  * Note that inside this no_inline function, we do inline the internal loop,
5305  * and provide a statically defined secret size to allow optimization of vector loop.
5306  */
5307 XXH_NO_INLINE XXH_PUREF XXH64_hash_t
5308 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
5309  XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5310 {
5311  (void)seed64; (void)secret; (void)secretLen;
5312  return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
5313 }
5314 
5315 /*
5316  * XXH3_hashLong_64b_withSeed():
5317  * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
5318  * and then use this key for long mode hashing.
5319  *
5320  * This operation is decently fast but nonetheless costs a little bit of time.
5321  * Try to avoid it whenever possible (typically when seed==0).
5322  *
5323  * It's important for performance that XXH3_hashLong is not inlined. Not sure
5324  * why (uop cache maybe?), but the difference is large and easily measurable.
5325  */
5326 XXH_FORCE_INLINE XXH64_hash_t
5327 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
5328  XXH64_hash_t seed,
5329  XXH3_f_accumulate f_acc,
5330  XXH3_f_scrambleAcc f_scramble,
5331  XXH3_f_initCustomSecret f_initSec)
5332 {
5333 #if XXH_SIZE_OPT <= 0
5334  if (seed == 0)
5335  return XXH3_hashLong_64b_internal(input, len,
5336  XXH3_kSecret, sizeof(XXH3_kSecret),
5337  f_acc, f_scramble);
5338 #endif
5339  { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5340  f_initSec(secret, seed);
5341  return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
5342  f_acc, f_scramble);
5343  }
5344 }
5345 
5346 /*
5347  * It's important for performance that XXH3_hashLong is not inlined.
5348  */
5349 XXH_NO_INLINE XXH64_hash_t
5350 XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
5351  XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5352 {
5353  (void)secret; (void)secretLen;
5354  return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
5355  XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
5356 }
5357 
5358 
5359 typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
5360  XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
5361 
5362 XXH_FORCE_INLINE XXH64_hash_t
5363 XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
5364  XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
5365  XXH3_hashLong64_f f_hashLong)
5366 {
5367  XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
5368  /*
5369  * If an action is to be taken if `secretLen` condition is not respected,
5370  * it should be done here.
5371  * For now, it's a contract pre-condition.
5372  * Adding a check and a branch here would cost performance at every hash.
5373  * Also, note that function signature doesn't offer room to return an error.
5374  */
5375  if (len <= 16)
5376  return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
5377  if (len <= 128)
5378  return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5379  if (len <= XXH3_MIDSIZE_MAX)
5380  return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5381  return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
5382 }
5383 
5384 
5385 /* === Public entry point === */
5386 
5389 {
5390  return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5391 }
5392 
5395 XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
5396 {
5397  return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5398 }
5399 
5402 XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
5403 {
5404  return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5405 }
5406 
5408 XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
5409 {
5410  if (length <= XXH3_MIDSIZE_MAX)
5411  return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5412  return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
5413 }
5414 
5415 
5416 /* === XXH3 streaming === */
5417 #ifndef XXH_NO_STREAM
5418 /*
5419  * Malloc's a pointer that is always aligned to align.
5420  *
5421  * This must be freed with `XXH_alignedFree()`.
5422  *
5423  * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
5424  * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
5425  * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
5426  *
5427  * This underalignment previously caused a rather obvious crash which went
5428  * completely unnoticed due to XXH3_createState() not actually being tested.
5429  * Credit to RedSpah for noticing this bug.
5430  *
5431  * The alignment is done manually: Functions like posix_memalign or _mm_malloc
5432  * are avoided: To maintain portability, we would have to write a fallback
5433  * like this anyways, and besides, testing for the existence of library
5434  * functions without relying on external build tools is impossible.
5435  *
5436  * The method is simple: Overallocate, manually align, and store the offset
5437  * to the original behind the returned pointer.
5438  *
5439  * Align must be a power of 2 and 8 <= align <= 128.
5440  */
5441 static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
5442 {
5443  XXH_ASSERT(align <= 128 && align >= 8); /* range check */
5444  XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
5445  XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
5446  { /* Overallocate to make room for manual realignment and an offset byte */
5447  xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
5448  if (base != NULL) {
5449  /*
5450  * Get the offset needed to align this pointer.
5451  *
5452  * Even if the returned pointer is aligned, there will always be
5453  * at least one byte to store the offset to the original pointer.
5454  */
5455  size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
5456  /* Add the offset for the now-aligned pointer */
5457  xxh_u8* ptr = base + offset;
5458 
5459  XXH_ASSERT((size_t)ptr % align == 0);
5460 
5461  /* Store the offset immediately before the returned pointer. */
5462  ptr[-1] = (xxh_u8)offset;
5463  return ptr;
5464  }
5465  return NULL;
5466  }
5467 }
5468 /*
5469  * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
5470  * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
5471  */
5472 static void XXH_alignedFree(void* p)
5473 {
5474  if (p != NULL) {
5475  xxh_u8* ptr = (xxh_u8*)p;
5476  /* Get the offset byte we added in XXH_malloc. */
5477  xxh_u8 offset = ptr[-1];
5478  /* Free the original malloc'd pointer */
5479  xxh_u8* base = ptr - offset;
5480  XXH_free(base);
5481  }
5482 }
5485 {
5486  XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
5487  if (state==NULL) return NULL;
5488  XXH3_INITSTATE(state);
5489  return state;
5490 }
5491 
5494 {
5495  XXH_alignedFree(statePtr);
5496  return XXH_OK;
5497 }
5498 
5500 XXH_PUBLIC_API void
5502 {
5503  XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
5504 }
5505 
5506 static void
5507 XXH3_reset_internal(XXH3_state_t* statePtr,
5508  XXH64_hash_t seed,
5509  const void* secret, size_t secretSize)
5510 {
5511  size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
5512  size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
5513  XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
5514  XXH_ASSERT(statePtr != NULL);
5515  /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
5516  // cppcheck-suppress nullPointerArithmeticRedundantCheck; false positive
5517  memset((char*)statePtr + initStart, 0, initLength);
5518  // cppcheck-suppress nullPointerRedundantCheck; false positive
5519  statePtr->acc[0] = XXH_PRIME32_3;
5520  // cppcheck-suppress nullPointerRedundantCheck; false positive
5521  statePtr->acc[1] = XXH_PRIME64_1;
5522  // cppcheck-suppress nullPointerRedundantCheck; false positive
5523  statePtr->acc[2] = XXH_PRIME64_2;
5524  // cppcheck-suppress nullPointerRedundantCheck; false positive
5525  statePtr->acc[3] = XXH_PRIME64_3;
5526  // cppcheck-suppress nullPointerRedundantCheck; false positive
5527  statePtr->acc[4] = XXH_PRIME64_4;
5528  // cppcheck-suppress nullPointerRedundantCheck; false positive
5529  statePtr->acc[5] = XXH_PRIME32_2;
5530  // cppcheck-suppress nullPointerRedundantCheck; false positive
5531  statePtr->acc[6] = XXH_PRIME64_5;
5532  // cppcheck-suppress nullPointerRedundantCheck; false positive
5533  statePtr->acc[7] = XXH_PRIME32_1;
5534  // cppcheck-suppress nullPointerRedundantCheck; false positive
5535  statePtr->seed = seed;
5536  // cppcheck-suppress nullPointerRedundantCheck; false positive
5537  statePtr->useSeed = (seed != 0);
5538  // cppcheck-suppress nullPointerRedundantCheck; false positive
5539  statePtr->extSecret = (const unsigned char*)secret;
5540  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5541  statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
5542  statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
5543 }
5544 
5548 {
5549  if (statePtr == NULL) return XXH_ERROR;
5550  XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5551  return XXH_OK;
5552 }
5553 
5556 XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
5557 {
5558  if (statePtr == NULL) return XXH_ERROR;
5559  XXH3_reset_internal(statePtr, 0, secret, secretSize);
5560  if (secret == NULL) return XXH_ERROR;
5561  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5562  return XXH_OK;
5563 }
5564 
5568 {
5569  if (statePtr == NULL) return XXH_ERROR;
5570  if (seed==0) return XXH3_64bits_reset(statePtr);
5571  if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5572  XXH3_initCustomSecret(statePtr->customSecret, seed);
5573  XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5574  return XXH_OK;
5575 }
5576 
5579 XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
5580 {
5581  if (statePtr == NULL) return XXH_ERROR;
5582  if (secret == NULL) return XXH_ERROR;
5583  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5584  XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5585  statePtr->useSeed = 1; /* always, even if seed64==0 */
5586  return XXH_OK;
5587 }
5588 
5606 XXH_FORCE_INLINE const xxh_u8 *
5607 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
5608  size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
5609  const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
5610  const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
5611  XXH3_f_accumulate f_acc,
5612  XXH3_f_scrambleAcc f_scramble)
5613 {
5614  const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
5615  /* Process full blocks */
5616  if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
5617  /* Process the initial partial block... */
5618  size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
5619 
5620  do {
5621  /* Accumulate and scramble */
5622  f_acc(acc, input, initialSecret, nbStripesThisIter);
5623  f_scramble(acc, secret + secretLimit);
5624  input += nbStripesThisIter * XXH_STRIPE_LEN;
5625  nbStripes -= nbStripesThisIter;
5626  /* Then continue the loop with the full block size */
5627  nbStripesThisIter = nbStripesPerBlock;
5628  initialSecret = secret;
5629  } while (nbStripes >= nbStripesPerBlock);
5630  *nbStripesSoFarPtr = 0;
5631  }
5632  /* Process a partial block */
5633  if (nbStripes > 0) {
5634  f_acc(acc, input, initialSecret, nbStripes);
5635  input += nbStripes * XXH_STRIPE_LEN;
5636  *nbStripesSoFarPtr += nbStripes;
5637  }
5638  /* Return end pointer */
5639  return input;
5640 }
5641 
5642 #ifndef XXH3_STREAM_USE_STACK
5643 # if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
5644 # define XXH3_STREAM_USE_STACK 1
5645 # endif
5646 #endif
5647 /*
5648  * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
5649  */
5650 XXH_FORCE_INLINE XXH_errorcode
5651 XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
5652  const xxh_u8* XXH_RESTRICT input, size_t len,
5653  XXH3_f_accumulate f_acc,
5654  XXH3_f_scrambleAcc f_scramble)
5655 {
5656  if (input==NULL) {
5657  XXH_ASSERT(len == 0);
5658  return XXH_OK;
5659  }
5660 
5661  XXH_ASSERT(state != NULL);
5662  { const xxh_u8* const bEnd = input + len;
5663  // cppcheck-suppress nullPointerRedundantCheck; false positive
5664  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5665 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5666  /* For some reason, gcc and MSVC seem to suffer greatly
5667  * when operating accumulators directly into state.
5668  * Operating into stack space seems to enable proper optimization.
5669  * clang, on the other hand, doesn't seem to need this trick */
5670  XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
5671  XXH_memcpy(acc, state->acc, sizeof(acc));
5672 #else
5673  xxh_u64* XXH_RESTRICT const acc = state->acc;
5674 #endif
5675  state->totalLen += len;
5676  XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
5677 
5678  /* small input : just fill in tmp buffer */
5679  if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
5680  XXH_memcpy(state->buffer + state->bufferedSize, input, len);
5681  state->bufferedSize += (XXH32_hash_t)len;
5682  return XXH_OK;
5683  }
5684 
5685  /* total input is now > XXH3_INTERNALBUFFER_SIZE */
5686  #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
5687  XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
5688 
5689  /*
5690  * Internal buffer is partially filled (always, except at beginning)
5691  * Complete it, then consume it.
5692  */
5693  if (state->bufferedSize) {
5694  size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
5695  XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
5696  input += loadSize;
5697  XXH3_consumeStripes(acc,
5698  &state->nbStripesSoFar, state->nbStripesPerBlock,
5699  state->buffer, XXH3_INTERNALBUFFER_STRIPES,
5700  secret, state->secretLimit,
5701  f_acc, f_scramble);
5702  state->bufferedSize = 0;
5703  }
5704  XXH_ASSERT(input < bEnd);
5705  if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
5706  size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
5707  input = XXH3_consumeStripes(acc,
5708  &state->nbStripesSoFar, state->nbStripesPerBlock,
5709  input, nbStripes,
5710  secret, state->secretLimit,
5711  f_acc, f_scramble);
5712  XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5713 
5714  }
5715  /* Some remaining input (always) : buffer it */
5716  XXH_ASSERT(input < bEnd);
5717  XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
5718  XXH_ASSERT(state->bufferedSize == 0);
5719  XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
5720  state->bufferedSize = (XXH32_hash_t)(bEnd-input);
5721 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5722  /* save stack accumulators into state */
5723  XXH_memcpy(state->acc, acc, sizeof(acc));
5724 #endif
5725  }
5726 
5727  return XXH_OK;
5728 }
5729 
5732 XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
5733 {
5734  return XXH3_update(state, (const xxh_u8*)input, len,
5735  XXH3_accumulate, XXH3_scrambleAcc);
5736 }
5737 
5738 
5739 XXH_FORCE_INLINE void
5740 XXH3_digest_long (XXH64_hash_t* acc,
5741  const XXH3_state_t* state,
5742  const unsigned char* secret)
5743 {
5744  xxh_u8 lastStripe[XXH_STRIPE_LEN];
5745  const xxh_u8* lastStripePtr;
5746 
5747  /*
5748  * Digest on a local copy. This way, the state remains unaltered, and it can
5749  * continue ingesting more input afterwards.
5750  */
5751  XXH_memcpy(acc, state->acc, sizeof(state->acc));
5752  if (state->bufferedSize >= XXH_STRIPE_LEN) {
5753  /* Consume remaining stripes then point to remaining data in buffer */
5754  size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
5755  size_t nbStripesSoFar = state->nbStripesSoFar;
5756  XXH3_consumeStripes(acc,
5757  &nbStripesSoFar, state->nbStripesPerBlock,
5758  state->buffer, nbStripes,
5759  secret, state->secretLimit,
5760  XXH3_accumulate, XXH3_scrambleAcc);
5761  lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
5762  } else { /* bufferedSize < XXH_STRIPE_LEN */
5763  /* Copy to temp buffer */
5764  size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
5765  XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
5766  XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
5767  XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
5768  lastStripePtr = lastStripe;
5769  }
5770  /* Last stripe */
5771  XXH3_accumulate_512(acc,
5772  lastStripePtr,
5773  secret + state->secretLimit - XXH_SECRET_LASTACC_START);
5774 }
5775 
5778 {
5779  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5780  if (state->totalLen > XXH3_MIDSIZE_MAX) {
5781  XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
5782  XXH3_digest_long(acc, state, secret);
5783  return XXH3_mergeAccs(acc,
5784  secret + XXH_SECRET_MERGEACCS_START,
5785  (xxh_u64)state->totalLen * XXH_PRIME64_1);
5786  }
5787  /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
5788  if (state->useSeed)
5789  return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
5790  return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
5791  secret, state->secretLimit + XXH_STRIPE_LEN);
5792 }
5793 #endif /* !XXH_NO_STREAM */
5794 
5795 
5796 /* ==========================================
5797  * XXH3 128 bits (a.k.a XXH128)
5798  * ==========================================
5799  * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
5800  * even without counting the significantly larger output size.
5801  *
5802  * For example, extra steps are taken to avoid the seed-dependent collisions
5803  * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
5804  *
5805  * This strength naturally comes at the cost of some speed, especially on short
5806  * lengths. Note that longer hashes are about as fast as the 64-bit version
5807  * due to it using only a slight modification of the 64-bit loop.
5808  *
5809  * XXH128 is also more oriented towards 64-bit machines. It is still extremely
5810  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
5811  */
5812 
5813 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5814 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5815 {
5816  /* A doubled version of 1to3_64b with different constants. */
5817  XXH_ASSERT(input != NULL);
5818  XXH_ASSERT(1 <= len && len <= 3);
5819  XXH_ASSERT(secret != NULL);
5820  /*
5821  * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
5822  * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
5823  * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
5824  */
5825  { xxh_u8 const c1 = input[0];
5826  xxh_u8 const c2 = input[len >> 1];
5827  xxh_u8 const c3 = input[len - 1];
5828  xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
5829  | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
5830  xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
5831  // cppcheck-suppress nullPointerArithmeticRedundantCheck; false positive
5832  xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
5833  // cppcheck-suppress nullPointerArithmeticRedundantCheck; false positive
5834  xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
5835  xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
5836  xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
5837  XXH128_hash_t h128;
5838  h128.low64 = XXH64_avalanche(keyed_lo);
5839  h128.high64 = XXH64_avalanche(keyed_hi);
5840  return h128;
5841  }
5842 }
5843 
5844 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5845 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5846 {
5847  XXH_ASSERT(input != NULL);
5848  XXH_ASSERT(secret != NULL);
5849  XXH_ASSERT(4 <= len && len <= 8);
5850  seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
5851  { xxh_u32 const input_lo = XXH_readLE32(input);
5852  xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
5853  xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
5854  xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
5855  xxh_u64 const keyed = input_64 ^ bitflip;
5856 
5857  /* Shift len to the left to ensure it is even, this avoids even multiplies. */
5858  XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
5859 
5860  m128.high64 += (m128.low64 << 1);
5861  m128.low64 ^= (m128.high64 >> 3);
5862 
5863  m128.low64 = XXH_xorshift64(m128.low64, 35);
5864  m128.low64 *= PRIME_MX2;
5865  m128.low64 = XXH_xorshift64(m128.low64, 28);
5866  m128.high64 = XXH3_avalanche(m128.high64);
5867  return m128;
5868  }
5869 }
5870 
5871 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5872 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5873 {
5874  XXH_ASSERT(input != NULL);
5875  XXH_ASSERT(secret != NULL);
5876  XXH_ASSERT(9 <= len && len <= 16);
5877  { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
5878  xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
5879  xxh_u64 const input_lo = XXH_readLE64(input);
5880  xxh_u64 input_hi = XXH_readLE64(input + len - 8);
5881  XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
5882  /*
5883  * Put len in the middle of m128 to ensure that the length gets mixed to
5884  * both the low and high bits in the 128x64 multiply below.
5885  */
5886  m128.low64 += (xxh_u64)(len - 1) << 54;
5887  input_hi ^= bitfliph;
5888  /*
5889  * Add the high 32 bits of input_hi to the high 32 bits of m128, then
5890  * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
5891  * the high 64 bits of m128.
5892  *
5893  * The best approach to this operation is different on 32-bit and 64-bit.
5894  */
5895  if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
5896  /*
5897  * 32-bit optimized version, which is more readable.
5898  *
5899  * On 32-bit, it removes an ADC and delays a dependency between the two
5900  * halves of m128.high64, but it generates an extra mask on 64-bit.
5901  */
5902  m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
5903  } else {
5904  /*
5905  * 64-bit optimized (albeit more confusing) version.
5906  *
5907  * Uses some properties of addition and multiplication to remove the mask:
5908  *
5909  * Let:
5910  * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
5911  * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
5912  * c = XXH_PRIME32_2
5913  *
5914  * a + (b * c)
5915  * Inverse Property: x + y - x == y
5916  * a + (b * (1 + c - 1))
5917  * Distributive Property: x * (y + z) == (x * y) + (x * z)
5918  * a + (b * 1) + (b * (c - 1))
5919  * Identity Property: x * 1 == x
5920  * a + b + (b * (c - 1))
5921  *
5922  * Substitute a, b, and c:
5923  * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5924  *
5925  * Since input_hi.hi + input_hi.lo == input_hi, we get this:
5926  * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5927  */
5928  m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
5929  }
5930  /* m128 ^= XXH_swap64(m128 >> 64); */
5931  m128.low64 ^= XXH_swap64(m128.high64);
5932 
5933  { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
5934  XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
5935  h128.high64 += m128.high64 * XXH_PRIME64_2;
5936 
5937  h128.low64 = XXH3_avalanche(h128.low64);
5938  h128.high64 = XXH3_avalanche(h128.high64);
5939  return h128;
5940  } }
5941 }
5942 
5943 /*
5944  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
5945  */
5946 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5947 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5948 {
5949  XXH_ASSERT(len <= 16);
5950  { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
5951  if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
5952  if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
5953  { XXH128_hash_t h128;
5954  xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
5955  xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
5956  h128.low64 = XXH64_avalanche(seed ^ bitflipl);
5957  h128.high64 = XXH64_avalanche( seed ^ bitfliph);
5958  return h128;
5959  } }
5960 }
5961 
5962 /*
5963  * A bit slower than XXH3_mix16B, but handles multiply by zero better.
5964  */
5965 XXH_FORCE_INLINE XXH128_hash_t
5966 XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
5967  const xxh_u8* secret, XXH64_hash_t seed)
5968 {
5969  acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
5970  acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
5971  acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
5972  acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
5973  return acc;
5974 }
5975 
5976 
5977 XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5978 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5979  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5980  XXH64_hash_t seed)
5981 {
5982  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
5983  XXH_ASSERT(16 < len && len <= 128);
5984 
5985  { XXH128_hash_t acc;
5986  acc.low64 = len * XXH_PRIME64_1;
5987  acc.high64 = 0;
5988 
5989 #if XXH_SIZE_OPT >= 1
5990  {
5991  /* Smaller, but slightly slower. */
5992  unsigned int i = (unsigned int)(len - 1) / 32;
5993  do {
5994  acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
5995  } while (i-- != 0);
5996  }
5997 #else
5998  if (len > 32) {
5999  if (len > 64) {
6000  if (len > 96) {
6001  acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
6002  }
6003  acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
6004  }
6005  acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
6006  }
6007  acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
6008 #endif
6009  { XXH128_hash_t h128;
6010  h128.low64 = acc.low64 + acc.high64;
6011  h128.high64 = (acc.low64 * XXH_PRIME64_1)
6012  + (acc.high64 * XXH_PRIME64_4)
6013  + ((len - seed) * XXH_PRIME64_2);
6014  h128.low64 = XXH3_avalanche(h128.low64);
6015  h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
6016  return h128;
6017  }
6018  }
6019 }
6020 
6021 XXH_NO_INLINE XXH_PUREF XXH128_hash_t
6022 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
6023  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6024  XXH64_hash_t seed)
6025 {
6026  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
6027  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
6028 
6029  { XXH128_hash_t acc;
6030  unsigned i;
6031  acc.low64 = len * XXH_PRIME64_1;
6032  acc.high64 = 0;
6033  /*
6034  * We set as `i` as offset + 32. We do this so that unchanged
6035  * `len` can be used as upper bound. This reaches a sweet spot
6036  * where both x86 and aarch64 get simple agen and good codegen
6037  * for the loop.
6038  */
6039  for (i = 32; i < 160; i += 32) {
6040  acc = XXH128_mix32B(acc,
6041  input + i - 32,
6042  input + i - 16,
6043  secret + i - 32,
6044  seed);
6045  }
6046  acc.low64 = XXH3_avalanche(acc.low64);
6047  acc.high64 = XXH3_avalanche(acc.high64);
6048  /*
6049  * NB: `i <= len` will duplicate the last 32-bytes if
6050  * len % 32 was zero. This is an unfortunate necessity to keep
6051  * the hash result stable.
6052  */
6053  for (i=160; i <= len; i += 32) {
6054  acc = XXH128_mix32B(acc,
6055  input + i - 32,
6056  input + i - 16,
6057  secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
6058  seed);
6059  }
6060  /* last bytes */
6061  acc = XXH128_mix32B(acc,
6062  input + len - 16,
6063  input + len - 32,
6064  secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
6065  (XXH64_hash_t)0 - seed);
6066 
6067  { XXH128_hash_t h128;
6068  h128.low64 = acc.low64 + acc.high64;
6069  h128.high64 = (acc.low64 * XXH_PRIME64_1)
6070  + (acc.high64 * XXH_PRIME64_4)
6071  + ((len - seed) * XXH_PRIME64_2);
6072  h128.low64 = XXH3_avalanche(h128.low64);
6073  h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
6074  return h128;
6075  }
6076  }
6077 }
6078 
6079 XXH_FORCE_INLINE XXH128_hash_t
6080 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
6081  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6082  XXH3_f_accumulate f_acc,
6083  XXH3_f_scrambleAcc f_scramble)
6084 {
6085  XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
6086 
6087  XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
6088 
6089  /* converge into final hash */
6090  XXH_STATIC_ASSERT(sizeof(acc) == 64);
6091  XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6092  { XXH128_hash_t h128;
6093  h128.low64 = XXH3_mergeAccs(acc,
6094  secret + XXH_SECRET_MERGEACCS_START,
6095  (xxh_u64)len * XXH_PRIME64_1);
6096  h128.high64 = XXH3_mergeAccs(acc,
6097  secret + secretSize
6098  - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6099  ~((xxh_u64)len * XXH_PRIME64_2));
6100  return h128;
6101  }
6102 }
6103 
6104 /*
6105  * It's important for performance that XXH3_hashLong() is not inlined.
6106  */
6107 XXH_NO_INLINE XXH_PUREF XXH128_hash_t
6108 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
6109  XXH64_hash_t seed64,
6110  const void* XXH_RESTRICT secret, size_t secretLen)
6111 {
6112  (void)seed64; (void)secret; (void)secretLen;
6113  return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
6114  XXH3_accumulate, XXH3_scrambleAcc);
6115 }
6116 
6117 /*
6118  * It's important for performance to pass @p secretLen (when it's static)
6119  * to the compiler, so that it can properly optimize the vectorized loop.
6120  *
6121  * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
6122  * breaks -Og, this is XXH_NO_INLINE.
6123  */
6124 XXH3_WITH_SECRET_INLINE XXH128_hash_t
6125 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
6126  XXH64_hash_t seed64,
6127  const void* XXH_RESTRICT secret, size_t secretLen)
6128 {
6129  (void)seed64;
6130  return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
6131  XXH3_accumulate, XXH3_scrambleAcc);
6132 }
6133 
6134 XXH_FORCE_INLINE XXH128_hash_t
6135 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
6136  XXH64_hash_t seed64,
6137  XXH3_f_accumulate f_acc,
6138  XXH3_f_scrambleAcc f_scramble,
6139  XXH3_f_initCustomSecret f_initSec)
6140 {
6141  if (seed64 == 0)
6142  return XXH3_hashLong_128b_internal(input, len,
6143  XXH3_kSecret, sizeof(XXH3_kSecret),
6144  f_acc, f_scramble);
6145  { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6146  f_initSec(secret, seed64);
6147  return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
6148  f_acc, f_scramble);
6149  }
6150 }
6151 
6152 /*
6153  * It's important for performance that XXH3_hashLong is not inlined.
6154  */
6155 XXH_NO_INLINE XXH128_hash_t
6156 XXH3_hashLong_128b_withSeed(const void* input, size_t len,
6157  XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
6158 {
6159  (void)secret; (void)secretLen;
6160  return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
6161  XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
6162 }
6163 
6164 typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
6165  XXH64_hash_t, const void* XXH_RESTRICT, size_t);
6166 
6167 XXH_FORCE_INLINE XXH128_hash_t
6168 XXH3_128bits_internal(const void* input, size_t len,
6169  XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
6170  XXH3_hashLong128_f f_hl128)
6171 {
6172  XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
6173  /*
6174  * If an action is to be taken if `secret` conditions are not respected,
6175  * it should be done here.
6176  * For now, it's a contract pre-condition.
6177  * Adding a check and a branch here would cost performance at every hash.
6178  */
6179  if (len <= 16)
6180  return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
6181  if (len <= 128)
6182  return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6183  if (len <= XXH3_MIDSIZE_MAX)
6184  return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6185  return f_hl128(input, len, seed64, secret, secretLen);
6186 }
6187 
6188 
6189 /* === Public XXH128 API === */
6190 
6193 {
6194  return XXH3_128bits_internal(input, len, 0,
6195  XXH3_kSecret, sizeof(XXH3_kSecret),
6196  XXH3_hashLong_128b_default);
6197 }
6198 
6201 XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
6202 {
6203  return XXH3_128bits_internal(input, len, 0,
6204  (const xxh_u8*)secret, secretSize,
6205  XXH3_hashLong_128b_withSecret);
6206 }
6207 
6210 XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6211 {
6212  return XXH3_128bits_internal(input, len, seed,
6213  XXH3_kSecret, sizeof(XXH3_kSecret),
6214  XXH3_hashLong_128b_withSeed);
6215 }
6216 
6219 XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6220 {
6221  if (len <= XXH3_MIDSIZE_MAX)
6222  return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
6223  return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
6224 }
6225 
6228 XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6229 {
6230  return XXH3_128bits_withSeed(input, len, seed);
6231 }
6232 
6233 
6234 /* === XXH3 128-bit streaming === */
6235 #ifndef XXH_NO_STREAM
6236 /*
6237  * All initialization and update functions are identical to 64-bit streaming variant.
6238  * The only difference is the finalization routine.
6239  */
6240 
6244 {
6245  return XXH3_64bits_reset(statePtr);
6246 }
6247 
6250 XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
6251 {
6252  return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
6253 }
6254 
6258 {
6259  return XXH3_64bits_reset_withSeed(statePtr, seed);
6260 }
6261 
6264 XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6265 {
6266  return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
6267 }
6268 
6271 XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
6272 {
6273  return XXH3_64bits_update(state, input, len);
6274 }
6275 
6278 {
6279  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6280  if (state->totalLen > XXH3_MIDSIZE_MAX) {
6281  XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
6282  XXH3_digest_long(acc, state, secret);
6283  XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6284  { XXH128_hash_t h128;
6285  h128.low64 = XXH3_mergeAccs(acc,
6286  secret + XXH_SECRET_MERGEACCS_START,
6287  (xxh_u64)state->totalLen * XXH_PRIME64_1);
6288  h128.high64 = XXH3_mergeAccs(acc,
6289  secret + state->secretLimit + XXH_STRIPE_LEN
6290  - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6291  ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
6292  return h128;
6293  }
6294  }
6295  /* len <= XXH3_MIDSIZE_MAX : short code */
6296  if (state->seed)
6297  return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
6298  return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
6299  secret, state->secretLimit + XXH_STRIPE_LEN);
6300 }
6301 #endif /* !XXH_NO_STREAM */
6302 /* 128-bit utility functions */
6303 
6304 #include <string.h> /* memcmp, memcpy */
6305 
6306 /* return : 1 is equal, 0 if different */
6309 {
6310  /* note : XXH128_hash_t is compact, it has no padding byte */
6311  return !(memcmp(&h1, &h2, sizeof(h1)));
6312 }
6313 
6314 /* This prototype is compatible with stdlib's qsort().
6315  * @return : >0 if *h128_1 > *h128_2
6316  * <0 if *h128_1 < *h128_2
6317  * =0 if *h128_1 == *h128_2 */
6319 XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
6320 {
6321  XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
6322  XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
6323  int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
6324  /* note : bets that, in most cases, hash values are different */
6325  if (hcmp) return hcmp;
6326  return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
6327 }
6328 
6329 
6330 /*====== Canonical representation ======*/
6332 XXH_PUBLIC_API void
6334 {
6335  XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
6336  if (XXH_CPU_LITTLE_ENDIAN) {
6337  hash.high64 = XXH_swap64(hash.high64);
6338  hash.low64 = XXH_swap64(hash.low64);
6339  }
6340  XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
6341  XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
6342 }
6343 
6347 {
6348  XXH128_hash_t h;
6349  h.high64 = XXH_readBE64(src);
6350  h.low64 = XXH_readBE64(src->digest + 8);
6351  return h;
6352 }
6353 
6354 
6355 
6356 /* ==========================================
6357  * Secret generators
6358  * ==========================================
6359  */
6360 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
6361 
6362 XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
6363 {
6364  XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
6365  XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
6366 }
6367 
6370 XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
6371 {
6372 #if (XXH_DEBUGLEVEL >= 1)
6373  XXH_ASSERT(secretBuffer != NULL);
6374  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
6375 #else
6376  /* production mode, assert() are disabled */
6377  if (secretBuffer == NULL) return XXH_ERROR;
6378  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
6379 #endif
6380 
6381  if (customSeedSize == 0) {
6382  customSeed = XXH3_kSecret;
6383  customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6384  }
6385 #if (XXH_DEBUGLEVEL >= 1)
6386  XXH_ASSERT(customSeed != NULL);
6387 #else
6388  if (customSeed == NULL) return XXH_ERROR;
6389 #endif
6390 
6391  /* Fill secretBuffer with a copy of customSeed - repeat as needed */
6392  { size_t pos = 0;
6393  while (pos < secretSize) {
6394  size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
6395  memcpy((char*)secretBuffer + pos, customSeed, toCopy);
6396  pos += toCopy;
6397  } }
6398 
6399  { size_t const nbSeg16 = secretSize / 16;
6400  size_t n;
6401  XXH128_canonical_t scrambler;
6402  XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
6403  for (n=0; n<nbSeg16; n++) {
6404  XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
6405  XXH3_combine16((char*)secretBuffer + n*16, h128);
6406  }
6407  /* last segment */
6408  XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
6409  }
6410  return XXH_OK;
6411 }
6412 
6414 XXH_PUBLIC_API void
6415 XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
6416 {
6417  XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6418  XXH3_initCustomSecret(secret, seed);
6419  XXH_ASSERT(secretBuffer != NULL);
6420  // cppcheck-suppress nullPointerRedundantCheck; false positive
6421  memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6422 }
6423 
6424 
6425 
6426 /* Pop our optimization override from above */
6427 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
6428  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
6429  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
6430 # pragma GCC pop_options
6431 #endif
6432 
6433 #endif /* XXH_NO_LONG_LONG */
6434 
6435 #endif /* XXH_NO_XXH3 */
6436 
6440 #endif /* XXH_IMPLEMENTATION */
6441 
6442 
6443 #if defined (__cplusplus)
6444 } /* extern "C" */
6445 #endif
python.CaloBCIDAvgAlgConfig.acc3
def acc3
Definition: CaloBCIDAvgAlgConfig.py:69
base
std::string base
Definition: hcg.cxx:78
data
char data[hepevt_bytes_allocation_ATLAS]
Definition: HepEvt.cxx:11
python.SystemOfUnits.s
int s
Definition: SystemOfUnits.py:131
get_generator_info.result
result
Definition: get_generator_info.py:21
XXH3_128bits_digest
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr)
python.PerfMonSerializer.p
def p
Definition: PerfMonSerializer.py:743
XXH32_update
XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *statePtr, const void *input, size_t length)
Consumes a block of input to an XXH32_state_t.
xAOD::uint8_t
uint8_t
Definition: Muon_v1.cxx:557
XXH64_update
XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
CaloCellPos2Ntuple.int
int
Definition: CaloCellPos2Ntuple.py:24
XXH32_digest
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr)
Returns the calculated hash value from an XXH32_state_t.
xAOD::uint32_t
setEventNumber uint32_t
Definition: EventInfo_v1.cxx:127
WriteCellNoiseToCool.src
src
Definition: WriteCellNoiseToCool.py:513
XXH3_SECRET_SIZE_MIN
#define XXH3_SECRET_SIZE_MIN
The bare minimum size for a custom secret.
Definition: xxhash.h:950
XXH3_128bits_reset
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr)
mergePhysValFiles.start
start
Definition: DataQuality/DataQualityUtils/scripts/mergePhysValFiles.py:14
extractSporadic.c1
c1
Definition: extractSporadic.py:134
XXH64_createState
XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t * XXH64_createState(void)
XXH_VERSION_NUMBER
#define XXH_VERSION_NUMBER
Version number, encoded as two digits each.
Definition: xxhash.h:453
XXH3_copyState
XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t *dst_state, XXH_NOESCAPE const XXH3_state_t *src_state)
Trk::one
@ one
Definition: TrkDetDescr/TrkSurfaces/TrkSurfaces/RealQuadraticEquation.h:22
XXH3_createState
XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t * XXH3_createState(void)
XXH64_hash_t
unsigned long long XXH64_hash_t
Definition: xxhash.h:794
upper
int upper(int c)
Definition: LArBadChannelParser.cxx:49
perfmonmt-printer.dest
dest
Definition: perfmonmt-printer.py:189
XXH_CONSTF
#define XXH_CONSTF
Definition: xxhash.h:441
XXH128_canonicalFromHash
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t *dst, XXH128_hash_t hash)
dbg::ptr
void * ptr(T *p)
Definition: SGImplSvc.cxx:74
const
bool const RAWDATA *ch2 const
Definition: LArRodBlockPhysicsV0.cxx:562
XXH_NOESCAPE
#define XXH_NOESCAPE
Definition: xxhash.h:761
read_hist_ntuple.h1
h1
Definition: read_hist_ntuple.py:21
XXH32_state_t
struct XXH32_state_s XXH32_state_t
Streaming functions generate the xxHash value from an incremental input.
Definition: xxhash.h:582
x
#define x
xAOD::unsigned
unsigned
Definition: RingSetConf_v1.cxx:662
Trk::u
@ u
Enums for curvilinear frames.
Definition: ParamDefs.h:77
XXH_OK
@ XXH_OK
OK.
Definition: xxhash.h:474
python.utils.AtlRunQueryLookup.mask
string mask
Definition: AtlRunQueryLookup.py:460
XXH64_freeState
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr)
XXH_versionNumber
XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber(void)
Obtains the xxHash version.
compileRPVLLRates_emergingFilterTest.c3
c3
Definition: compileRPVLLRates_emergingFilterTest.py:559
python.setupRTTAlg.size
int size
Definition: setupRTTAlg.py:39
skel.input1
tuple input1
Definition: skel.GENtoEVGEN.py:777
H5Utils::internal::packed
H5::CompType packed(H5::CompType in)
Definition: common.cxx:16
XXH64
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed)
Calculates the 64-bit hash of input using xxHash64.
createCoolChannelIdFile.buffer
buffer
Definition: createCoolChannelIdFile.py:12
XXH3_128bits_withSeed
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void *data, size_t len, XXH64_hash_t seed)
Seeded 128-bit variant of XXH3.
Generate_dsid_ranseed.seed
seed
Definition: Generate_dsid_ranseed.py:10
convertTimingResiduals.sum
sum
Definition: convertTimingResiduals.py:55
XXH128_hash_t
The return value from 128-bit hashes.
Definition: xxhash.h:1033
lumiFormat.i
int i
Definition: lumiFormat.py:85
python.LArBadChannelDBAlg.xFFFFFFFF
xFFFFFFFF
Definition: LArBadChannelDBAlg.py:73
XXH_PUREF
#define XXH_PUREF
Definition: xxhash.h:442
XXH_PUBLIC_API
#define XXH_PUBLIC_API
Marks a global symbol.
Definition: xxhash.h:432
ret
T ret(T t)
Definition: rootspy.cxx:260
beamspotman.n
n
Definition: beamspotman.py:731
python.CaloBCIDAvgAlgConfig.acc1
def acc1
Definition: CaloBCIDAvgAlgConfig.py:49
extractSporadic.h
list h
Definition: extractSporadic.py:97
XXH32_reset
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed)
Resets an XXH32_state_t to begin a new hash.
PlotPulseshapeFromCool.input
input
Definition: PlotPulseshapeFromCool.py:106
XXH128_cmp
XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void *h128_1, XXH_NOESCAPE const void *h128_2)
Compares two XXH128_hash_t This comparator is compatible with stdlib's qsort()/bsearch().
python.CaloBCIDAvgAlgConfig.acc2
def acc2
Definition: CaloBCIDAvgAlgConfig.py:59
xAOD::uint64_t
uint64_t
Definition: EventInfo_v1.cxx:123
AthenaPoolTestRead.acc
acc
Definition: AthenaPoolTestRead.py:16
XXH32_freeState
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr)
Frees an XXH32_state_t.
TrigInDetValidation_Base.malloc
malloc
Definition: TrigInDetValidation_Base.py:133
XXH_MALLOCF
#define XXH_MALLOCF
Definition: xxhash.h:443
XXH32_hashFromCanonical
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src)
Converts an XXH32_canonical_t to a native XXH32_hash_t.
XXH3_64bits_reset_withSecret
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *secret, size_t secretSize)
XXH3_64bits_reset_withSecret(): secret is referenced, it must outlive the hash streaming session.
XXH3_128bits
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void *data, size_t len)
Unseeded 128-bit variant of XXH3.
XXH3_freeState
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr)
XXH64_state_t
struct XXH64_state_s XXH64_state_t
The opaque state struct for the XXH64 streaming API.
Definition: xxhash.h:844
XXH3_64bits_reset_withSeed
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed)
plotBeamSpotMon.b
b
Definition: plotBeamSpotMon.py:77
compileRPVLLRates.c2
c2
Definition: compileRPVLLRates.py:361
XXH128_isEqual
XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
XXH128_isEqual(): Return: 1 if h1 and h2 are equal, 0 if they are not.
XXH3_64bits_reset
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr)
XXH3_128bits_update
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
XXH64_canonicalFromHash
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t *dst, XXH64_hash_t hash)
XXH3_128bits_reset_withSecret
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *secret, size_t secretSize)
Custom secret 128-bit variant of XXH3.
python.LumiBlobConversion.pos
pos
Definition: LumiBlobConversion.py:18
XXH64_hashFromCanonical
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t *src)
XXH32_canonical_t
Canonical (big endian) representation of XXH32_hash_t.
Definition: xxhash.h:685
XXH128_hashFromCanonical
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t *src)
XXH3_64bits_withSeed
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed)
64-bit seeded variant of XXH3
ReadCellNoiseFromCoolCompare.v2
v2
Definition: ReadCellNoiseFromCoolCompare.py:364
python.PyAthena.v
v
Definition: PyAthena.py:154
XXH3_64bits
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void *input, size_t length)
64-bit unseeded variant of XXH3.
__attribute__
__attribute__((always_inline)) inline uint16_t TileCalibDrawerBase
Definition: TileCalibDrawerBase.h:190
a
TList * a
Definition: liststreamerinfos.cxx:10
XXH_errorcode
XXH_errorcode
Exit code for the streaming API.
Definition: xxhash.h:473
y
#define y
XXH64_copyState
XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t *dst_state, const XXH64_state_t *src_state)
h
CaloCondBlobAlgs_fillNoiseFromASCII.hash
dictionary hash
Definition: CaloCondBlobAlgs_fillNoiseFromASCII.py:109
XXH3_state_t
struct XXH3_state_s XXH3_state_t
The state struct for the XXH3 streaming API.
Definition: xxhash.h:987
Pythia8_RapidityOrderMPI.val
val
Definition: Pythia8_RapidityOrderMPI.py:14
XXH3_64bits_withSecret
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize)
64-bit variant of XXH3 with a custom "secret".
XXH_FALLTHROUGH
#define XXH_FALLTHROUGH
Definition: xxhash.h:750
if
if(febId1==febId2)
Definition: LArRodBlockPhysicsV0.cxx:569
convertTimingResiduals.offset
offset
Definition: convertTimingResiduals.py:71
XXH32
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32(const void *input, size_t length, XXH32_hash_t seed)
Calculates the 32-bit hash of input using xxHash32.
XXH64_digest
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t *statePtr)
XXH3_64bits_digest
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr)
XXH3_64bits_update
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
XXH64_canonical_t
Definition: xxhash.h:854
XXH_ERROR
@ XXH_ERROR
Error.
Definition: xxhash.h:475
python.compressB64.c
def c
Definition: compressB64.py:93
updateCoolNtuple.limit
int limit
Definition: updateCoolNtuple.py:45
XXH3_128bits_reset_withSeed
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed)
length
double length(const pvec &v)
Definition: FPGATrackSimLLPDoubletHoughTransformTool.cxx:26
XXH128_canonical_t
Definition: xxhash.h:1106
XXH128_hash_t::high64
XXH64_hash_t high64
value >> 64
Definition: xxhash.h:1035
XXH32_canonicalFromHash
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, XXH32_hash_t hash)
Converts an XXH32_hash_t to a big endian XXH32_canonical_t.
XXH64_reset
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t *statePtr, XXH64_hash_t seed)
XXH3_128bits_withSecret
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize)
Custom secret 128-bit variant of XXH3.
XXH128_hash_t::low64
XXH64_hash_t low64
value & 0xFFFFFFFFFFFFFFFF
Definition: xxhash.h:1034
XXH32_createState
XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t * XXH32_createState(void)
Allocates an XXH32_state_t.
XXH32_copyState
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t *dst_state, const XXH32_state_t *src_state)
Copies one XXH32_state_t to another.