ATLAS Offline Software
Loading...
Searching...
No Matches
xxhash.h
Go to the documentation of this file.
1/*
2 * xxHash - Extremely Fast Hash algorithm
3 * Header File
4 * Copyright (C) 2012-2025 Yann Collet
5 *
6 * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following disclaimer
16 * in the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * You can contact the author at:
32 * - xxHash homepage: https://www.xxhash.com
33 * - xxHash source repository: https://github.com/Cyan4973/xxHash
34 */
35
171
172#if defined (__cplusplus)
173extern "C" {
174#endif
175
176/* ****************************
177 * INLINE mode
178 ******************************/
184#ifdef XXH_DOXYGEN
203# define XXH_INLINE_ALL
204# undef XXH_INLINE_ALL
208# define XXH_PRIVATE_API
209# undef XXH_PRIVATE_API
223# define XXH_NAMESPACE /* YOUR NAME HERE */
224# undef XXH_NAMESPACE
225#endif
226
227#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
228 && !defined(XXH_INLINE_ALL_31684351384)
229 /* this section should be traversed only once */
230# define XXH_INLINE_ALL_31684351384
231 /* give access to the advanced API, required to compile implementations */
232# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
233# define XXH_STATIC_LINKING_ONLY
234 /* make all functions private */
235# undef XXH_PUBLIC_API
236# if defined(__GNUC__)
237# define XXH_PUBLIC_API static __inline __attribute__((unused))
238# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
239# define XXH_PUBLIC_API static inline
240# elif defined(_MSC_VER)
241# define XXH_PUBLIC_API static __inline
242# else
243 /* note: this version may generate warnings for unused static functions */
244# define XXH_PUBLIC_API static
245# endif
246
247 /*
248 * This part deals with the special case where a unit wants to inline xxHash,
249 * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
250 * such as part of some previously included *.h header file.
251 * Without further action, the new include would just be ignored,
252 * and functions would effectively _not_ be inlined (silent failure).
253 * The following macros solve this situation by prefixing all inlined names,
254 * avoiding naming collision with previous inclusions.
255 */
256 /* Before that, we unconditionally #undef all symbols,
257 * in case they were already defined with XXH_NAMESPACE.
258 * They will then be redefined for XXH_INLINE_ALL
259 */
260# undef XXH_versionNumber
261 /* XXH32 */
262# undef XXH32
263# undef XXH32_createState
264# undef XXH32_freeState
265# undef XXH32_reset
266# undef XXH32_update
267# undef XXH32_digest
268# undef XXH32_copyState
269# undef XXH32_canonicalFromHash
270# undef XXH32_hashFromCanonical
271 /* XXH64 */
272# undef XXH64
273# undef XXH64_createState
274# undef XXH64_freeState
275# undef XXH64_reset
276# undef XXH64_update
277# undef XXH64_digest
278# undef XXH64_copyState
279# undef XXH64_canonicalFromHash
280# undef XXH64_hashFromCanonical
281 /* XXH3_64bits */
282# undef XXH3_64bits
283# undef XXH3_64bits_withSecret
284# undef XXH3_64bits_withSeed
285# undef XXH3_64bits_withSecretandSeed
286# undef XXH3_createState
287# undef XXH3_freeState
288# undef XXH3_copyState
289# undef XXH3_64bits_reset
290# undef XXH3_64bits_reset_withSeed
291# undef XXH3_64bits_reset_withSecret
292# undef XXH3_64bits_update
293# undef XXH3_64bits_digest
294# undef XXH3_generateSecret
295 /* XXH3_128bits */
296# undef XXH128
297# undef XXH3_128bits
298# undef XXH3_128bits_withSeed
299# undef XXH3_128bits_withSecret
300# undef XXH3_128bits_reset
301# undef XXH3_128bits_reset_withSeed
302# undef XXH3_128bits_reset_withSecret
303# undef XXH3_128bits_reset_withSecretandSeed
304# undef XXH3_128bits_update
305# undef XXH3_128bits_digest
306# undef XXH128_isEqual
307# undef XXH128_cmp
308# undef XXH128_canonicalFromHash
309# undef XXH128_hashFromCanonical
310 /* Finally, free the namespace itself */
311# undef XXH_NAMESPACE
312
313 /* employ the namespace for XXH_INLINE_ALL */
314# define XXH_NAMESPACE XXH_INLINE_
315 /*
316 * Some identifiers (enums, type names) are not symbols,
317 * but they must nonetheless be renamed to avoid redeclaration.
318 * Alternative solution: do not redeclare them.
319 * However, this requires some #ifdefs, and has a more dispersed impact.
320 * Meanwhile, renaming can be achieved in a single place.
321 */
322# define XXH_IPREF(Id) XXH_NAMESPACE ## Id
323# define XXH_OK XXH_IPREF(XXH_OK)
324# define XXH_ERROR XXH_IPREF(XXH_ERROR)
325# define XXH_errorcode XXH_IPREF(XXH_errorcode)
326# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
327# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
328# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
329# define XXH32_state_s XXH_IPREF(XXH32_state_s)
330# define XXH32_state_t XXH_IPREF(XXH32_state_t)
331# define XXH64_state_s XXH_IPREF(XXH64_state_s)
332# define XXH64_state_t XXH_IPREF(XXH64_state_t)
333# define XXH3_state_s XXH_IPREF(XXH3_state_s)
334# define XXH3_state_t XXH_IPREF(XXH3_state_t)
335# define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
336 /* Ensure the header is parsed again, even if it was previously included */
337# undef XXHASH_H_5627135585666179
338# undef XXHASH_H_STATIC_13879238742
339#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
340
341/* ****************************************************************
342 * Stable API
343 *****************************************************************/
344#ifndef XXHASH_H_5627135585666179
345#define XXHASH_H_5627135585666179 1
346
348#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
349# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
350# ifdef XXH_EXPORT
351# define XXH_PUBLIC_API __declspec(dllexport)
352# elif XXH_IMPORT
353# define XXH_PUBLIC_API __declspec(dllimport)
354# endif
355# else
356# define XXH_PUBLIC_API /* do nothing */
357# endif
358#endif
359
360#ifdef XXH_NAMESPACE
361# define XXH_CAT(A,B) A##B
362# define XXH_NAME2(A,B) XXH_CAT(A,B)
363# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
364/* XXH32 */
365# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
366# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
367# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
368# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
369# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
370# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
371# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
372# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
373# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
374/* XXH64 */
375# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
376# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
377# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
378# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
379# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
380# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
381# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
382# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
383# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
384/* XXH3_64bits */
385# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
386# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
387# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
388# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
389# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
390# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
391# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
392# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
393# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
394# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
395# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
396# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
397# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
398# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
399# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
400/* XXH3_128bits */
401# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
402# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
403# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
404# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
405# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
406# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
407# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
408# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
409# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
410# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
411# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
412# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
413# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
414# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
415# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
416#endif
417
418
419/* *************************************
420* Compiler specifics
421***************************************/
422
423/* specific declaration modes for Windows */
424#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
425# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
426# ifdef XXH_EXPORT
427# define XXH_PUBLIC_API __declspec(dllexport)
428# elif XXH_IMPORT
429# define XXH_PUBLIC_API __declspec(dllimport)
430# endif
431# else
432# define XXH_PUBLIC_API /* do nothing */
433# endif
434#endif
435
436#if defined (__GNUC__)
437# define XXH_CONSTF __attribute__((const))
438# define XXH_PUREF __attribute__((pure))
439# define XXH_MALLOCF __attribute__((malloc))
440#else
441# define XXH_CONSTF /* disable */
442# define XXH_PUREF
443# define XXH_MALLOCF
444#endif
445
446/* *************************************
447* Version
448***************************************/
449#define XXH_VERSION_MAJOR 0
450#define XXH_VERSION_MINOR 8
451#define XXH_VERSION_RELEASE 2
453#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
454
464
465
466/* ****************************
467* Common basic types
468******************************/
469#include <stddef.h> /* size_t */
473typedef enum {
474 XXH_OK = 0,
477
478
479/*-**********************************************************************
480* 32-bit hash
481************************************************************************/
482#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
488typedef uint32_t XXH32_hash_t;
489
490#elif !defined (__VMS) \
491 && (defined (__cplusplus) \
492 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
493# include <stdint.h>
494 typedef uint32_t XXH32_hash_t;
495
496#else
497# include <limits.h>
498# if UINT_MAX == 0xFFFFFFFFUL
499 typedef unsigned int XXH32_hash_t;
500# elif ULONG_MAX == 0xFFFFFFFFUL
501 typedef unsigned long XXH32_hash_t;
502# else
503# error "unsupported platform: need a 32-bit type"
504# endif
505#endif
506
523
548XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
549
550#ifndef XXH_NO_STREAM
575
582typedef struct XXH32_state_s XXH32_state_t;
583
608
623
642XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
643
658XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
659#endif /* !XXH_NO_STREAM */
660
661/******* Canonical representation *******/
662
663/*
664 * The default return values from XXH functions are unsigned 32 and 64 bit
665 * integers.
666 * This the simplest and fastest format for further post-processing.
667 *
668 * However, this leaves open the question of what is the order on the byte level,
669 * since little and big endian conventions will store the same number differently.
670 *
671 * The canonical representation settles this issue by mandating big-endian
672 * convention, the same convention as human-readable numbers (large digits first).
673 *
674 * When writing hash values to storage, sending them over a network, or printing
675 * them, it's highly recommended to use the canonical representation to ensure
676 * portability across a wider range of systems, present and future.
677 *
678 * The following functions allow transformation of hash values to and from
679 * canonical format.
680 */
681
685typedef struct {
686 unsigned char digest[4];
688
699
711
712
713#ifdef __has_attribute
714# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
715#else
716# define XXH_HAS_ATTRIBUTE(x) 0
717#endif
718
719/*
720 * C23 __STDC_VERSION__ number hasn't been specified yet. For now
721 * leave as `201711L` (C17 + 1).
722 * TODO: Update to correct value when its been specified.
723 */
724#define XXH_C23_VN 201711L
725
726/* C-language Attributes are added in C23. */
727#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
728# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
729#else
730# define XXH_HAS_C_ATTRIBUTE(x) 0
731#endif
732
733#if defined(__cplusplus) && defined(__has_cpp_attribute)
734# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
735#else
736# define XXH_HAS_CPP_ATTRIBUTE(x) 0
737#endif
738
739/*
740 * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
741 * introduced in CPP17 and C23.
742 * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
743 * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
744 */
745#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
746# define XXH_FALLTHROUGH [[fallthrough]]
747#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
748# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
749#else
750# define XXH_FALLTHROUGH /* fallthrough */
751#endif
752
753/*
754 * Define XXH_NOESCAPE for annotated pointers in public API.
755 * https://clang.llvm.org/docs/AttributeReference.html#noescape
756 * As of writing this, only supported by clang.
757 */
758#if XXH_HAS_ATTRIBUTE(noescape)
759# define XXH_NOESCAPE __attribute__((noescape))
760#else
761# define XXH_NOESCAPE
762#endif
763
764
770
771#ifndef XXH_NO_LONG_LONG
772/*-**********************************************************************
773* 64-bit hash
774************************************************************************/
775#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
781typedef uint64_t XXH64_hash_t;
782#elif !defined (__VMS) \
783 && (defined (__cplusplus) \
784 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
785# include <stdint.h>
786 typedef uint64_t XXH64_hash_t;
787#else
788# include <limits.h>
789# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
790 /* LP64 ABI says uint64_t is unsigned long */
791 typedef unsigned long XXH64_hash_t;
792# else
793 /* the following type must have a width of 64-bit */
794 typedef unsigned long long XXH64_hash_t;
795# endif
796#endif
797
811
836
837/******* Streaming *******/
838#ifndef XXH_NO_STREAM
844typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
848
852#endif /* !XXH_NO_STREAM */
853/******* Canonical representation *******/
854typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
857
858#ifndef XXH_NO_XXH3
859
906/*-**********************************************************************
907* XXH3 64-bit variant
908************************************************************************/
909
925
942
950#define XXH3_SECRET_SIZE_MIN 136
951
970XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
971
972
973/******* Streaming *******/
974#ifndef XXH_NO_STREAM
975/*
976 * Streaming requires state maintenance.
977 * This operation costs memory and CPU.
978 * As a consequence, streaming is slower than one-shot hashing.
979 * For better performance, prefer one-shot functions whenever applicable.
980 */
981
987typedef struct XXH3_state_s XXH3_state_t;
991
992/*
993 * XXH3_64bits_reset():
994 * Initialize with default parameters.
995 * digest will be equivalent to `XXH3_64bits()`.
996 */
998/*
999 * XXH3_64bits_reset_withSeed():
1000 * Generate a custom secret from `seed`, and store it into `statePtr`.
1001 * digest will be equivalent to `XXH3_64bits_withSeed()`.
1002 */
1014
1017#endif /* !XXH_NO_STREAM */
1018
1019/* note : canonical representation of XXH3 is the same as XXH64
1020 * since they both produce XXH64_hash_t values */
1021
1022
1023/*-**********************************************************************
1024* XXH3 128-bit variant
1025************************************************************************/
1026
1037
1059XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
1060
1061/******* Streaming *******/
1062#ifndef XXH_NO_STREAM
1063/*
1064 * Streaming requires state maintenance.
1065 * This operation costs memory and CPU.
1066 * As a consequence, streaming is slower than one-shot hashing.
1067 * For better performance, prefer one-shot functions whenever applicable.
1068 *
1069 * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
1070 * Use already declared XXH3_createState() and XXH3_freeState().
1071 *
1072 * All reset and streaming functions have same meaning as their 64-bit counterpart.
1073 */
1074
1079
1082#endif /* !XXH_NO_STREAM */
1083
1084/* Following helper functions make it possible to compare XXH128_hast_t values.
1085 * Since XXH128_hash_t is a structure, this capability is not offered by the language.
1086 * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
1087
1093
1102XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
1103
1104
1105/******* Canonical representation *******/
1106typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
1109
1110
1111#endif /* !XXH_NO_XXH3 */
1112#endif /* XXH_NO_LONG_LONG */
1113
1117#endif /* XXHASH_H_5627135585666179 */
1118
1119
1120
1121#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
1122#define XXHASH_H_STATIC_13879238742
1123/* ****************************************************************************
1124 * This section contains declarations which are not guaranteed to remain stable.
1125 * They may change in future versions, becoming incompatible with a different
1126 * version of the library.
1127 * These declarations should only be used with static linking.
1128 * Never use them in association with dynamic linking!
1129 ***************************************************************************** */
1130
1131/*
1132 * These definitions are only present to allow static allocation
1133 * of XXH states, on stack or in a struct, for example.
1134 * Never **ever** access their members directly.
1135 */
1136
1149struct XXH32_state_s {
1150 XXH32_hash_t total_len_32;
1151 XXH32_hash_t large_len;
1152 XXH32_hash_t v[4];
1153 XXH32_hash_t mem32[4];
1154 XXH32_hash_t memsize;
1155 XXH32_hash_t reserved;
1156}; /* typedef'd to XXH32_state_t */
1157
1158
1159#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
1160
1173struct XXH64_state_s {
1174 XXH64_hash_t total_len;
1175 XXH64_hash_t v[4];
1176 XXH64_hash_t mem64[4];
1177 XXH32_hash_t memsize;
1178 XXH32_hash_t reserved32;
1179 XXH64_hash_t reserved64;
1180}; /* typedef'd to XXH64_state_t */
1181
1182#ifndef XXH_NO_XXH3
1183
1184#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
1185# include <stdalign.h>
1186# define XXH_ALIGN(n) alignas(n)
1187#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
1188/* In C++ alignas() is a keyword */
1189# define XXH_ALIGN(n) alignas(n)
1190#elif defined(__GNUC__)
1191# define XXH_ALIGN(n) __attribute__ ((aligned(n)))
1192#elif defined(_MSC_VER)
1193# define XXH_ALIGN(n) __declspec(align(n))
1194#else
1195# define XXH_ALIGN(n) /* disabled */
1196#endif
1197
1198/* Old GCC versions only accept the attribute after the type in structures. */
1199#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
1200 && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
1201 && defined(__GNUC__)
1202# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1203#else
1204# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1205#endif
1206
1214#define XXH3_INTERNALBUFFER_SIZE 256
1215
1223#define XXH3_SECRET_DEFAULT_SIZE 192
1224
1247struct XXH3_state_s {
1248 XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
1250 XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1252 XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1254 XXH32_hash_t bufferedSize;
1256 XXH32_hash_t useSeed;
1258 size_t nbStripesSoFar;
1260 XXH64_hash_t totalLen;
1262 size_t nbStripesPerBlock;
1264 size_t secretLimit;
1268 XXH64_hash_t reserved64;
1270 const unsigned char* extSecret;
1273 /* note: there may be some padding at the end due to alignment on 64 bytes */
1274}; /* typedef'd to XXH3_state_t */
1275
1276#undef XXH_ALIGN_MEMBER
1277
1289#define XXH3_INITSTATE(XXH3_state_ptr) \
1290 do { \
1291 XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
1292 tmp_xxh3_state_ptr->seed = 0; \
1293 tmp_xxh3_state_ptr->extSecret = NULL; \
1294 } while(0)
1295
1296
1300XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
1301
1302
1303/* === Experimental API === */
1304/* Symbols defined below must be considered tied to a specific library version. */
1305
1357XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
1358
1396XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
1397
1425XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
1426 XXH_NOESCAPE const void* secret, size_t secretSize,
1427 XXH64_hash_t seed);
1430XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
1431 XXH_NOESCAPE const void* secret, size_t secretSize,
1432 XXH64_hash_t seed64);
1433#ifndef XXH_NO_STREAM
1436XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1437 XXH_NOESCAPE const void* secret, size_t secretSize,
1438 XXH64_hash_t seed64);
1441XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1442 XXH_NOESCAPE const void* secret, size_t secretSize,
1443 XXH64_hash_t seed64);
1444#endif /* !XXH_NO_STREAM */
1445
1446#endif /* !XXH_NO_XXH3 */
1447#endif /* XXH_NO_LONG_LONG */
1448#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1449# define XXH_IMPLEMENTATION
1450#endif
1451
1452#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
1453
1454
1455/* ======================================================================== */
1456/* ======================================================================== */
1457/* ======================================================================== */
1458
1459
1460/*-**********************************************************************
1461 * xxHash implementation
1462 *-**********************************************************************
1463 * xxHash's implementation used to be hosted inside xxhash.c.
1464 *
1465 * However, inlining requires implementation to be visible to the compiler,
1466 * hence be included alongside the header.
1467 * Previously, implementation was hosted inside xxhash.c,
1468 * which was then #included when inlining was activated.
1469 * This construction created issues with a few build and install systems,
1470 * as it required xxhash.c to be stored in /include directory.
1471 *
1472 * xxHash implementation is now directly integrated within xxhash.h.
1473 * As a consequence, xxhash.c is no longer needed in /include.
1474 *
1475 * xxhash.c is still available and is still useful.
1476 * In a "normal" setup, when xxhash is not inlined,
1477 * xxhash.h only exposes the prototypes and public symbols,
1478 * while xxhash.c can be built into an object file xxhash.o
1479 * which can then be linked into the final binary.
1480 ************************************************************************/
1481
1482#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1483 || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1484# define XXH_IMPLEM_13a8737387
1485
1486/* *************************************
1487* Tuning parameters
1488***************************************/
1489
1496#ifdef XXH_DOXYGEN
1502# define XXH_NO_LONG_LONG
1503# undef XXH_NO_LONG_LONG /* don't actually */
1554# define XXH_FORCE_MEMORY_ACCESS 0
1555
1582# define XXH_SIZE_OPT 0
1583
1612# define XXH_FORCE_ALIGN_CHECK 0
1613
1634# define XXH_NO_INLINE_HINTS 0
1635
1651# define XXH3_INLINE_SECRET 0
1652
1663# define XXH32_ENDJMP 0
1664
1672# define XXH_OLD_NAMES
1673# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
1674
1683# define XXH_NO_STREAM
1684# undef XXH_NO_STREAM /* don't actually */
1685#endif /* XXH_DOXYGEN */
1689
1690#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
1691 /* prefer __packed__ structures (method 1) for GCC
1692 * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
1693 * which for some reason does unaligned loads. */
1694# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1695# define XXH_FORCE_MEMORY_ACCESS 1
1696# endif
1697#endif
1698
1699#ifndef XXH_SIZE_OPT
1700 /* default to 1 for -Os or -Oz */
1701# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1702# define XXH_SIZE_OPT 1
1703# else
1704# define XXH_SIZE_OPT 0
1705# endif
1706#endif
1707
1708#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
1709 /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
1710# if XXH_SIZE_OPT >= 1 || \
1711 defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
1712 || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
1713# define XXH_FORCE_ALIGN_CHECK 0
1714# else
1715# define XXH_FORCE_ALIGN_CHECK 1
1716# endif
1717#endif
1718
1719#ifndef XXH_NO_INLINE_HINTS
1720# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
1721# define XXH_NO_INLINE_HINTS 1
1722# else
1723# define XXH_NO_INLINE_HINTS 0
1724# endif
1725#endif
1726
1727#ifndef XXH3_INLINE_SECRET
1728# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
1729 || !defined(XXH_INLINE_ALL)
1730# define XXH3_INLINE_SECRET 0
1731# else
1732# define XXH3_INLINE_SECRET 1
1733# endif
1734#endif
1735
1736#ifndef XXH32_ENDJMP
1737/* generally preferable for performance */
1738# define XXH32_ENDJMP 0
1739#endif
1740
1745
1746
1747/* *************************************
1748* Includes & Memory related functions
1749***************************************/
1750#if defined(XXH_NO_STREAM)
1751/* nothing */
1752#elif defined(XXH_NO_STDLIB)
1753
1754/* When requesting to disable any mention of stdlib,
1755 * the library loses the ability to invoked malloc / free.
1756 * In practice, it means that functions like `XXH*_createState()`
1757 * will always fail, and return NULL.
1758 * This flag is useful in situations where
1759 * xxhash.h is integrated into some kernel, embedded or limited environment
1760 * without access to dynamic allocation.
1761 */
1762
1763static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
1764static void XXH_free(void* p) { (void)p; }
1765
1766#else
1767
1768/*
1769 * Modify the local functions below should you wish to use
1770 * different memory routines for malloc() and free()
1771 */
1772#include <stdlib.h>
1773
1778static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
1779
1784static void XXH_free(void* p) { free(p); }
1785
1786#endif /* XXH_NO_STDLIB */
1787
1788#include <string.h>
1789
1794static void* XXH_memcpy(void* dest, const void* src, size_t size)
1795{
1796 return memcpy(dest,src,size);
1797}
1798
1799#include <limits.h> /* ULLONG_MAX */
1800
1801
1802/* *************************************
1803* Compiler Specific Options
1804***************************************/
1805#ifdef _MSC_VER /* Visual Studio warning fix */
1806# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1807#endif
1808
1809#if XXH_NO_INLINE_HINTS /* disable inlining hints */
1810# if defined(__GNUC__) || defined(__clang__)
1811# define XXH_FORCE_INLINE static __attribute__((unused))
1812# else
1813# define XXH_FORCE_INLINE static
1814# endif
1815# define XXH_NO_INLINE static
1816/* enable inlining hints */
1817#elif defined(__GNUC__) || defined(__clang__)
1818# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1819# define XXH_NO_INLINE static __attribute__((noinline))
1820#elif defined(_MSC_VER) /* Visual Studio */
1821# define XXH_FORCE_INLINE static __forceinline
1822# define XXH_NO_INLINE static __declspec(noinline)
1823#elif defined (__cplusplus) \
1824 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
1825# define XXH_FORCE_INLINE static inline
1826# define XXH_NO_INLINE static
1827#else
1828# define XXH_FORCE_INLINE static
1829# define XXH_NO_INLINE static
1830#endif
1831
1832#if XXH3_INLINE_SECRET
1833# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
1834#else
1835# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
1836#endif
1837
1838
1839/* *************************************
1840* Debug
1841***************************************/
1850#ifndef XXH_DEBUGLEVEL
1851# ifdef DEBUGLEVEL /* backwards compat */
1852# define XXH_DEBUGLEVEL DEBUGLEVEL
1853# else
1854# define XXH_DEBUGLEVEL 0
1855# endif
1856#endif
1857
1858#if (XXH_DEBUGLEVEL>=1) || __CPPCHECK__
1859# include <assert.h> /* note: can still be disabled with NDEBUG */
1860# define XXH_ASSERT(c) assert(c)
1861#else
1862# define XXH_ASSERT(c) XXH_ASSUME(c)
1863#endif
1864
1865/* note: use after variable declarations */
1866#ifndef XXH_STATIC_ASSERT
1867# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
1868# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
1869# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
1870# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1871# else
1872# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
1873# endif
1874# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
1875#endif
1876
1893#if defined(__GNUC__) || defined(__clang__)
1894# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
1895#else
1896# define XXH_COMPILER_GUARD(var) ((void)0)
1897#endif
1898
1899#if defined(__clang__)
1900# define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
1901#else
1902# define XXH_COMPILER_GUARD_W(var) ((void)0)
1903#endif
1904
1905/* *************************************
1906* Basic Types
1907***************************************/
1908#if !defined (__VMS) \
1909 && (defined (__cplusplus) \
1910 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
1911# include <stdint.h>
1912 typedef uint8_t xxh_u8;
1913#else
1914 typedef unsigned char xxh_u8;
1915#endif
1916typedef XXH32_hash_t xxh_u32;
1917
1918#ifdef XXH_OLD_NAMES
1919# define BYTE xxh_u8
1920# define U8 xxh_u8
1921# define U32 xxh_u32
1922#endif
1923
1924/* *** Memory access *** */
1925
1936
1947
1958
1975
1976#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1977/*
1978 * Manual byteshift. Best for old compilers which don't inline memcpy.
1979 * We actually directly use XXH_readLE32 and XXH_readBE32.
1980 */
1981#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1982
1983/*
1984 * Force direct memory access. Only works on CPU which support unaligned memory
1985 * access in hardware.
1986 */
1987static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
1988
1989#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1990
1991/*
1992 * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
1993 * documentation claimed that it only increased the alignment, but actually it
1994 * can decrease it on gcc, clang, and icc:
1995 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
1996 * https://gcc.godbolt.org/z/xYez1j67Y.
1997 */
1998#ifdef XXH_OLD_NAMES
1999typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
2000#endif
2001static xxh_u32 XXH_read32(const void* ptr)
2002{
2003 typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
2004 return *((const xxh_unalign32*)ptr);
2005}
2006
2007#else
2008
2009/*
2010 * Portable and safe solution. Generally efficient.
2011 * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2012 */
2013static xxh_u32 XXH_read32(const void* memPtr)
2014{
2015 xxh_u32 val;
2016 XXH_memcpy(&val, memPtr, sizeof(val));
2017 return val;
2018}
2019
2020#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2021
2022
2023/* *** Endianness *** */
2024
2041#ifndef XXH_CPU_LITTLE_ENDIAN
2042/*
2043 * Try to detect endianness automatically, to avoid the nonstandard behavior
2044 * in `XXH_isLittleEndian()`
2045 */
2046# if defined(_WIN32) /* Windows is always little endian */ \
2047 || defined(__LITTLE_ENDIAN__) \
2048 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2049# define XXH_CPU_LITTLE_ENDIAN 1
2050# elif defined(__BIG_ENDIAN__) \
2051 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2052# define XXH_CPU_LITTLE_ENDIAN 0
2053# else
2060static int XXH_isLittleEndian(void)
2061{
2062 /*
2063 * Portable and well-defined behavior.
2064 * Don't use static: it is detrimental to performance.
2065 */
2066 const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
2067 return one.c[0];
2068}
2069# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
2070# endif
2071#endif
2072
2073
2074
2075
2076/* ****************************************
2077* Compiler-specific Functions and Macros
2078******************************************/
2079#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2080
2081#ifdef __has_builtin
2082# define XXH_HAS_BUILTIN(x) __has_builtin(x)
2083#else
2084# define XXH_HAS_BUILTIN(x) 0
2085#endif
2086
2087
2088
2089/*
2090 * C23 and future versions have standard "unreachable()".
2091 * Once it has been implemented reliably we can add it as an
2092 * additional case:
2093 *
2094 * ```
2095 * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
2096 * # include <stddef.h>
2097 * # ifdef unreachable
2098 * # define XXH_UNREACHABLE() unreachable()
2099 * # endif
2100 * #endif
2101 * ```
2102 *
2103 * Note C++23 also has std::unreachable() which can be detected
2104 * as follows:
2105 * ```
2106 * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
2107 * # include <utility>
2108 * # define XXH_UNREACHABLE() std::unreachable()
2109 * #endif
2110 * ```
2111 * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
2112 * We don't use that as including `<utility>` in `extern "C"` blocks
2113 * doesn't work on GCC12
2114 */
2115
2116#if XXH_HAS_BUILTIN(__builtin_unreachable)
2117# define XXH_UNREACHABLE() __builtin_unreachable()
2118
2119#elif defined(_MSC_VER)
2120# define XXH_UNREACHABLE() __assume(0)
2121
2122#else
2123# define XXH_UNREACHABLE()
2124#endif
2125
2126#if XXH_HAS_BUILTIN(__builtin_assume)
2127# define XXH_ASSUME(c) __builtin_assume(c)
2128#else
2129# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
2130#endif
2131
2145#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
2146 && XXH_HAS_BUILTIN(__builtin_rotateleft64)
2147# define XXH_rotl32 __builtin_rotateleft32
2148# define XXH_rotl64 __builtin_rotateleft64
2149/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
2150#elif defined(_MSC_VER)
2151# define XXH_rotl32(x,r) _rotl(x,r)
2152# define XXH_rotl64(x,r) _rotl64(x,r)
2153#else
2154# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
2155# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
2156#endif
2157
2166#if defined(_MSC_VER) /* Visual Studio */
2167# define XXH_swap32 _byteswap_ulong
2168#elif XXH_GCC_VERSION >= 403
2169# define XXH_swap32 __builtin_bswap32
2170#else
2171static xxh_u32 XXH_swap32 (xxh_u32 x)
2172{
2173 return ((x << 24) & 0xff000000 ) |
2174 ((x << 8) & 0x00ff0000 ) |
2175 ((x >> 8) & 0x0000ff00 ) |
2176 ((x >> 24) & 0x000000ff );
2177}
2178#endif
2179
2180
2181/* ***************************
2182* Memory reads
2183*****************************/
2184
2189typedef enum {
2190 XXH_aligned,
2191 XXH_unaligned
2192} XXH_alignment;
2193
2194/*
2195 * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
2196 *
2197 * This is ideal for older compilers which don't inline memcpy.
2198 */
2199#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2200
2201XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
2202{
2203 const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2204 return bytePtr[0]
2205 | ((xxh_u32)bytePtr[1] << 8)
2206 | ((xxh_u32)bytePtr[2] << 16)
2207 | ((xxh_u32)bytePtr[3] << 24);
2208}
2209
2210XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
2211{
2212 const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2213 return bytePtr[3]
2214 | ((xxh_u32)bytePtr[2] << 8)
2215 | ((xxh_u32)bytePtr[1] << 16)
2216 | ((xxh_u32)bytePtr[0] << 24);
2217}
2218
2219#else
2220XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
2221{
2222 return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
2223}
2224
2225static xxh_u32 XXH_readBE32(const void* ptr)
2226{
2227 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
2228}
2229#endif
2230
2231XXH_FORCE_INLINE xxh_u32
2232XXH_readLE32_align(const void* ptr, XXH_alignment align)
2233{
2234 if (align==XXH_unaligned) {
2235 return XXH_readLE32(ptr);
2236 } else {
2237 return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
2238 }
2239}
2240
2241
2242/* *************************************
2243* Misc
2244***************************************/
2246XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
2247
2248
2249/* *******************************************************************
2250* 32-bit hash functions
2251*********************************************************************/
2260 /* #define instead of static const, to be used as initializers */
2261#define XXH_PRIME32_1 0x9E3779B1U
2262#define XXH_PRIME32_2 0x85EBCA77U
2263#define XXH_PRIME32_3 0xC2B2AE3DU
2264#define XXH_PRIME32_4 0x27D4EB2FU
2265#define XXH_PRIME32_5 0x165667B1U
2266
2267#ifdef XXH_OLD_NAMES
2268# define PRIME32_1 XXH_PRIME32_1
2269# define PRIME32_2 XXH_PRIME32_2
2270# define PRIME32_3 XXH_PRIME32_3
2271# define PRIME32_4 XXH_PRIME32_4
2272# define PRIME32_5 XXH_PRIME32_5
2273#endif
2274
2286static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
2287{
2288 acc += input * XXH_PRIME32_2;
2289 acc = XXH_rotl32(acc, 13);
2290 acc *= XXH_PRIME32_1;
2291#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
2292 /*
2293 * UGLY HACK:
2294 * A compiler fence is the only thing that prevents GCC and Clang from
2295 * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
2296 * reason) without globally disabling SSE4.1.
2297 *
2298 * The reason we want to avoid vectorization is because despite working on
2299 * 4 integers at a time, there are multiple factors slowing XXH32 down on
2300 * SSE4:
2301 * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
2302 * newer chips!) making it slightly slower to multiply four integers at
2303 * once compared to four integers independently. Even when pmulld was
2304 * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
2305 * just to multiply unless doing a long operation.
2306 *
2307 * - Four instructions are required to rotate,
2308 * movqda tmp, v // not required with VEX encoding
2309 * pslld tmp, 13 // tmp <<= 13
2310 * psrld v, 19 // x >>= 19
2311 * por v, tmp // x |= tmp
2312 * compared to one for scalar:
2313 * roll v, 13 // reliably fast across the board
2314 * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
2315 *
2316 * - Instruction level parallelism is actually more beneficial here because
2317 * the SIMD actually serializes this operation: While v1 is rotating, v2
2318 * can load data, while v3 can multiply. SSE forces them to operate
2319 * together.
2320 *
2321 * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
2322 * the loop. NEON is only faster on the A53, and with the newer cores, it is less
2323 * than half the speed.
2324 */
2325 XXH_COMPILER_GUARD(acc);
2326#endif
2327 return acc;
2328}
2329
2340static xxh_u32 XXH32_avalanche(xxh_u32 hash)
2341{
2342 hash ^= hash >> 15;
2343 hash *= XXH_PRIME32_2;
2344 hash ^= hash >> 13;
2345 hash *= XXH_PRIME32_3;
2346 hash ^= hash >> 16;
2347 return hash;
2348}
2349
2350#define XXH_get32bits(p) XXH_readLE32_align(p, align)
2351
2367static XXH_PUREF xxh_u32
2368XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2369{
2370#define XXH_PROCESS1 do { \
2371 hash += (*ptr++) * XXH_PRIME32_5; \
2372 hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
2373} while (0)
2374
2375#define XXH_PROCESS4 do { \
2376 hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2377 ptr += 4; \
2378 hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
2379} while (0)
2380
2381 if (ptr==NULL) XXH_ASSERT(len == 0);
2382
2383 /* Compact rerolled version; generally faster */
2384 if (!XXH32_ENDJMP) {
2385 len &= 15;
2386 while (len >= 4) {
2387 XXH_PROCESS4;
2388 len -= 4;
2389 }
2390 while (len > 0) {
2391 XXH_PROCESS1;
2392 --len;
2393 }
2394 return XXH32_avalanche(hash);
2395 } else {
2396 switch(len&15) /* or switch(bEnd - p) */ {
2397 case 12: XXH_PROCESS4;
2398 XXH_FALLTHROUGH; /* fallthrough */
2399 case 8: XXH_PROCESS4;
2400 XXH_FALLTHROUGH; /* fallthrough */
2401 case 4: XXH_PROCESS4;
2402 return XXH32_avalanche(hash);
2403
2404 case 13: XXH_PROCESS4;
2405 XXH_FALLTHROUGH; /* fallthrough */
2406 case 9: XXH_PROCESS4;
2407 XXH_FALLTHROUGH; /* fallthrough */
2408 case 5: XXH_PROCESS4;
2409 XXH_PROCESS1;
2410 return XXH32_avalanche(hash);
2411
2412 case 14: XXH_PROCESS4;
2413 XXH_FALLTHROUGH; /* fallthrough */
2414 case 10: XXH_PROCESS4;
2415 XXH_FALLTHROUGH; /* fallthrough */
2416 case 6: XXH_PROCESS4;
2417 XXH_PROCESS1;
2418 XXH_PROCESS1;
2419 return XXH32_avalanche(hash);
2420
2421 case 15: XXH_PROCESS4;
2422 XXH_FALLTHROUGH; /* fallthrough */
2423 case 11: XXH_PROCESS4;
2424 XXH_FALLTHROUGH; /* fallthrough */
2425 case 7: XXH_PROCESS4;
2426 XXH_FALLTHROUGH; /* fallthrough */
2427 case 3: XXH_PROCESS1;
2428 XXH_FALLTHROUGH; /* fallthrough */
2429 case 2: XXH_PROCESS1;
2430 XXH_FALLTHROUGH; /* fallthrough */
2431 case 1: XXH_PROCESS1;
2432 XXH_FALLTHROUGH; /* fallthrough */
2433 case 0: return XXH32_avalanche(hash);
2434 }
2435 XXH_ASSERT(0);
2436 return hash; /* reaching this point is deemed impossible */
2437 }
2438}
2439
2440#ifdef XXH_OLD_NAMES
2441# define PROCESS1 XXH_PROCESS1
2442# define PROCESS4 XXH_PROCESS4
2443#else
2444# undef XXH_PROCESS1
2445# undef XXH_PROCESS4
2446#endif
2447
2456XXH_FORCE_INLINE XXH_PUREF xxh_u32
2457XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
2458{
2459 xxh_u32 h32;
2460
2461 if (input==NULL) XXH_ASSERT(len == 0);
2462
2463 if (len>=16) {
2464 const xxh_u8* const bEnd = input + len;
2465 const xxh_u8* const limit = bEnd - 15;
2466 xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2467 xxh_u32 v2 = seed + XXH_PRIME32_2;
2468 xxh_u32 v3 = seed + 0;
2469 xxh_u32 v4 = seed - XXH_PRIME32_1;
2470
2471 do {
2472 v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
2473 v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
2474 v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
2475 v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
2476 } while (input < limit);
2477
2478 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
2479 + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2480 } else {
2481 h32 = seed + XXH_PRIME32_5;
2482 }
2483
2484 h32 += (xxh_u32)len;
2485
2486 return XXH32_finalize(h32, input, len&15, align);
2487}
2488
2490XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
2491{
2492#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2493 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2494 XXH32_state_t state;
2495 XXH32_reset(&state, seed);
2496 XXH32_update(&state, (const xxh_u8*)input, len);
2497 return XXH32_digest(&state);
2498#else
2499 if (XXH_FORCE_ALIGN_CHECK) {
2500 if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
2501 return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2502 } }
2503
2504 return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2505#endif
2506}
2507
2508
2509
2510/******* Hash streaming *******/
2511#ifndef XXH_NO_STREAM
2514{
2515 return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
2516}
2519{
2520 XXH_free(statePtr);
2521 return XXH_OK;
2522}
2523
2525XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
2526{
2527 XXH_memcpy(dstState, srcState, sizeof(*dstState));
2528}
2529
2531XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
2532{
2533 XXH_ASSERT(statePtr != NULL);
2534 memset(statePtr, 0, sizeof(*statePtr));
2535 statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2536 statePtr->v[1] = seed + XXH_PRIME32_2;
2537 statePtr->v[2] = seed + 0;
2538 statePtr->v[3] = seed - XXH_PRIME32_1;
2539 return XXH_OK;
2540}
2541
2542
2545XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2546{
2547 if (input==NULL) {
2548 XXH_ASSERT(len == 0);
2549 return XXH_OK;
2550 }
2551
2552 { const xxh_u8* p = (const xxh_u8*)input;
2553 const xxh_u8* const bEnd = p + len;
2554
2555 state->total_len_32 += (XXH32_hash_t)len;
2556 state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2557
2558 if (state->memsize + len < 16) { /* fill in tmp buffer */
2559 XXH_memcpy(reinterpret_cast<xxh_u8*>(state->mem32) + state->memsize, input, len);
2560 state->memsize += (XXH32_hash_t)len;
2561 return XXH_OK;
2562 }
2563
2564 if (state->memsize) { /* some data left from previous update */
2565 XXH_memcpy(reinterpret_cast<xxh_u8*>(state->mem32) + state->memsize, input, 16-state->memsize);
2566 { const xxh_u32* p32 = state->mem32;
2567 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2568 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2569 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2570 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2571 }
2572 p += 16-state->memsize;
2573 state->memsize = 0;
2574 }
2575
2576 if (p <= bEnd-16) {
2577 const xxh_u8* const limit = bEnd - 16;
2578
2579 do {
2580 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
2581 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
2582 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
2583 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
2584 } while (p<=limit);
2585
2586 }
2587
2588 if (p < bEnd) {
2589 XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
2590 state->memsize = (unsigned)(bEnd-p);
2591 }
2592 }
2593
2594 return XXH_OK;
2595}
2596
2597
2599XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
2600{
2601 xxh_u32 h32;
2602
2603 if (state->large_len) {
2604 h32 = XXH_rotl32(state->v[0], 1)
2605 + XXH_rotl32(state->v[1], 7)
2606 + XXH_rotl32(state->v[2], 12)
2607 + XXH_rotl32(state->v[3], 18);
2608 } else {
2609 h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
2610 }
2611
2612 h32 += state->total_len_32;
2613
2614 return XXH32_finalize(h32, reinterpret_cast<const xxh_u8*>(state->mem32), state->memsize, XXH_aligned);
2615}
2616#endif /* !XXH_NO_STREAM */
2617
2618/******* Canonical representation *******/
2619
2634XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
2635{
2636 XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
2637 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
2638 XXH_memcpy(dst, &hash, sizeof(*dst));
2639}
2642{
2643 return XXH_readBE32(src);
2644}
2645
2646
2647#ifndef XXH_NO_LONG_LONG
2648
2649/* *******************************************************************
2650* 64-bit hash functions
2651*********************************************************************/
2657/******* Memory access *******/
2658
2659typedef XXH64_hash_t xxh_u64;
2660
2661#ifdef XXH_OLD_NAMES
2662# define U64 xxh_u64
2663#endif
2664
2665#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2666/*
2667 * Manual byteshift. Best for old compilers which don't inline memcpy.
2668 * We actually directly use XXH_readLE64 and XXH_readBE64.
2669 */
2670#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2671
2672/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
2673static xxh_u64 XXH_read64(const void* memPtr)
2674{
2675 return *(const xxh_u64*) memPtr;
2676}
2677
2678#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2679
2680/*
2681 * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
2682 * documentation claimed that it only increased the alignment, but actually it
2683 * can decrease it on gcc, clang, and icc:
2684 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
2685 * https://gcc.godbolt.org/z/xYez1j67Y.
2686 */
2687#ifdef XXH_OLD_NAMES
2688typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
2689#endif
2690static xxh_u64 XXH_read64(const void* ptr)
2691{
2692 typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
2693 return *((const xxh_unalign64*)ptr);
2694}
2695
2696#else
2697
2698/*
2699 * Portable and safe solution. Generally efficient.
2700 * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2701 */
2702static xxh_u64 XXH_read64(const void* memPtr)
2703{
2704 xxh_u64 val;
2705 XXH_memcpy(&val, memPtr, sizeof(val));
2706 return val;
2707}
2708
2709#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2710
2711#if defined(_MSC_VER) /* Visual Studio */
2712# define XXH_swap64 _byteswap_uint64
2713#elif XXH_GCC_VERSION >= 403
2714# define XXH_swap64 __builtin_bswap64
2715#else
2716static xxh_u64 XXH_swap64(xxh_u64 x)
2717{
2718 return ((x << 56) & 0xff00000000000000ULL) |
2719 ((x << 40) & 0x00ff000000000000ULL) |
2720 ((x << 24) & 0x0000ff0000000000ULL) |
2721 ((x << 8) & 0x000000ff00000000ULL) |
2722 ((x >> 8) & 0x00000000ff000000ULL) |
2723 ((x >> 24) & 0x0000000000ff0000ULL) |
2724 ((x >> 40) & 0x000000000000ff00ULL) |
2725 ((x >> 56) & 0x00000000000000ffULL);
2726}
2727#endif
2728
2729
2730/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
2731#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2732
2733XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
2734{
2735 const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2736 return bytePtr[0]
2737 | ((xxh_u64)bytePtr[1] << 8)
2738 | ((xxh_u64)bytePtr[2] << 16)
2739 | ((xxh_u64)bytePtr[3] << 24)
2740 | ((xxh_u64)bytePtr[4] << 32)
2741 | ((xxh_u64)bytePtr[5] << 40)
2742 | ((xxh_u64)bytePtr[6] << 48)
2743 | ((xxh_u64)bytePtr[7] << 56);
2744}
2745
2746XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
2747{
2748 const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2749 return bytePtr[7]
2750 | ((xxh_u64)bytePtr[6] << 8)
2751 | ((xxh_u64)bytePtr[5] << 16)
2752 | ((xxh_u64)bytePtr[4] << 24)
2753 | ((xxh_u64)bytePtr[3] << 32)
2754 | ((xxh_u64)bytePtr[2] << 40)
2755 | ((xxh_u64)bytePtr[1] << 48)
2756 | ((xxh_u64)bytePtr[0] << 56);
2757}
2758
2759#else
2760XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
2761{
2762 return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
2763}
2764
2765static xxh_u64 XXH_readBE64(const void* ptr)
2766{
2767 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
2768}
2769#endif
2770
2771XXH_FORCE_INLINE xxh_u64
2772XXH_readLE64_align(const void* ptr, XXH_alignment align)
2773{
2774 if (align==XXH_unaligned)
2775 return XXH_readLE64(ptr);
2776 else
2777 return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
2778}
2779
2780
2781/******* xxh64 *******/
2790/* #define rather that static const, to be used as initializers */
2791#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL
2792#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL
2793#define XXH_PRIME64_3 0x165667B19E3779F9ULL
2794#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL
2795#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL
2796
2797#ifdef XXH_OLD_NAMES
2798# define PRIME64_1 XXH_PRIME64_1
2799# define PRIME64_2 XXH_PRIME64_2
2800# define PRIME64_3 XXH_PRIME64_3
2801# define PRIME64_4 XXH_PRIME64_4
2802# define PRIME64_5 XXH_PRIME64_5
2803#endif
2804
2806static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
2807{
2808 acc += input * XXH_PRIME64_2;
2809 acc = XXH_rotl64(acc, 31);
2810 acc *= XXH_PRIME64_1;
2811 return acc;
2812}
2813
2814static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
2815{
2816 val = XXH64_round(0, val);
2817 acc ^= val;
2818 acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
2819 return acc;
2820}
2821
2823static xxh_u64 XXH64_avalanche(xxh_u64 hash)
2824{
2825 hash ^= hash >> 33;
2826 hash *= XXH_PRIME64_2;
2827 hash ^= hash >> 29;
2828 hash *= XXH_PRIME64_3;
2829 hash ^= hash >> 32;
2830 return hash;
2831}
2832
2833
2834#define XXH_get64bits(p) XXH_readLE64_align(p, align)
2835
2851static XXH_PUREF xxh_u64
2852XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2853{
2854 if (ptr==NULL) XXH_ASSERT(len == 0);
2855 len &= 31;
2856 while (len >= 8) {
2857 xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
2858 ptr += 8;
2859 hash ^= k1;
2860 hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2861 len -= 8;
2862 }
2863 if (len >= 4) {
2864 hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
2865 ptr += 4;
2866 hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2867 len -= 4;
2868 }
2869 while (len > 0) {
2870 hash ^= (*ptr++) * XXH_PRIME64_5;
2871 hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
2872 --len;
2873 }
2874 return XXH64_avalanche(hash);
2875}
2876
2877#ifdef XXH_OLD_NAMES
2878# define PROCESS1_64 XXH_PROCESS1_64
2879# define PROCESS4_64 XXH_PROCESS4_64
2880# define PROCESS8_64 XXH_PROCESS8_64
2881#else
2882# undef XXH_PROCESS1_64
2883# undef XXH_PROCESS4_64
2884# undef XXH_PROCESS8_64
2885#endif
2886
2895XXH_FORCE_INLINE XXH_PUREF xxh_u64
2896XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
2897{
2898 xxh_u64 h64;
2899 if (input==NULL) XXH_ASSERT(len == 0);
2900
2901 if (len>=32) {
2902 const xxh_u8* const bEnd = input + len;
2903 const xxh_u8* const limit = bEnd - 31;
2904 xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2905 xxh_u64 v2 = seed + XXH_PRIME64_2;
2906 xxh_u64 v3 = seed + 0;
2907 xxh_u64 v4 = seed - XXH_PRIME64_1;
2908
2909 do {
2910 v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
2911 v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
2912 v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
2913 v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
2914 } while (input<limit);
2915
2916 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2917 h64 = XXH64_mergeRound(h64, v1);
2918 h64 = XXH64_mergeRound(h64, v2);
2919 h64 = XXH64_mergeRound(h64, v3);
2920 h64 = XXH64_mergeRound(h64, v4);
2921
2922 } else {
2923 h64 = seed + XXH_PRIME64_5;
2924 }
2925
2926 h64 += (xxh_u64) len;
2927
2928 return XXH64_finalize(h64, input, len, align);
2929}
2930
2931
2933XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
2934{
2935#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2936 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2937 XXH64_state_t state;
2938 XXH64_reset(&state, seed);
2939 XXH64_update(&state, (const xxh_u8*)input, len);
2940 return XXH64_digest(&state);
2941#else
2942 if (XXH_FORCE_ALIGN_CHECK) {
2943 if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
2944 return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2945 } }
2946
2947 return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2948
2949#endif
2950}
2951
2952/******* Hash Streaming *******/
2953#ifndef XXH_NO_STREAM
2956{
2957 return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
2958}
2961{
2962 XXH_free(statePtr);
2963 return XXH_OK;
2964}
2965
2968{
2969 XXH_memcpy(dstState, srcState, sizeof(*dstState));
2970}
2971
2974{
2975 XXH_ASSERT(statePtr != NULL);
2976 memset(statePtr, 0, sizeof(*statePtr));
2977 statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2978 statePtr->v[1] = seed + XXH_PRIME64_2;
2979 statePtr->v[2] = seed + 0;
2980 statePtr->v[3] = seed - XXH_PRIME64_1;
2981 return XXH_OK;
2982}
2983
2986XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
2987{
2988 if (input==NULL) {
2989 XXH_ASSERT(len == 0);
2990 return XXH_OK;
2991 }
2992
2993 { const xxh_u8* p = (const xxh_u8*)input;
2994 const xxh_u8* const bEnd = p + len;
2995
2996 state->total_len += len;
2997
2998 if (state->memsize + len < 32) { /* fill in tmp buffer */
2999 XXH_memcpy((reinterpret_cast<xxh_u8*>(state->mem64)) + state->memsize, input, len);
3000 state->memsize += (xxh_u32)len;
3001 return XXH_OK;
3002 }
3003
3004 if (state->memsize) { /* tmp buffer is full */
3005 XXH_memcpy((reinterpret_cast<xxh_u8*>(state->mem64)) + state->memsize, input, 32-state->memsize);
3006 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
3007 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
3008 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
3009 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
3010 p += 32 - state->memsize;
3011 state->memsize = 0;
3012 }
3013
3014 if (p+32 <= bEnd) {
3015 const xxh_u8* const limit = bEnd - 32;
3016
3017 do {
3018 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
3019 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
3020 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
3021 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
3022 } while (p<=limit);
3023
3024 }
3025
3026 if (p < bEnd) {
3027 XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
3028 state->memsize = (unsigned)(bEnd-p);
3029 }
3030 }
3031
3032 return XXH_OK;
3033}
3034
3035
3038{
3039 xxh_u64 h64;
3040
3041 if (state->total_len >= 32) {
3042 h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
3043 h64 = XXH64_mergeRound(h64, state->v[0]);
3044 h64 = XXH64_mergeRound(h64, state->v[1]);
3045 h64 = XXH64_mergeRound(h64, state->v[2]);
3046 h64 = XXH64_mergeRound(h64, state->v[3]);
3047 } else {
3048 h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
3049 }
3050
3051 h64 += (xxh_u64) state->total_len;
3052
3053 return XXH64_finalize(h64, reinterpret_cast<const xxh_u8*>(state->mem64), (size_t)state->total_len, XXH_aligned);
3054}
3055#endif /* !XXH_NO_STREAM */
3056
3057/******* Canonical representation *******/
3058
3061{
3062 XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
3063 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
3064 XXH_memcpy(dst, &hash, sizeof(*dst));
3065}
3066
3069{
3070 return XXH_readBE64(src);
3071}
3072
3073#ifndef XXH_NO_XXH3
3074
3075/* *********************************************************************
3076* XXH3
3077* New generation hash designed for speed on small keys and vectorization
3078************************************************************************ */
3085
3086/* === Compiler specifics === */
3087
3088#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
3089# define XXH_RESTRICT /* disable */
3090#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
3091# define XXH_RESTRICT restrict
3092#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
3093 || (defined (__clang__)) \
3094 || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
3095 || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
3096/*
3097 * There are a LOT more compilers that recognize __restrict but this
3098 * covers the major ones.
3099 */
3100# define XXH_RESTRICT __restrict
3101#else
3102# define XXH_RESTRICT /* disable */
3103#endif
3104
3105#if (defined(__GNUC__) && (__GNUC__ >= 3)) \
3106 || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
3107 || defined(__clang__)
3108# define XXH_likely(x) __builtin_expect(x, 1)
3109# define XXH_unlikely(x) __builtin_expect(x, 0)
3110#else
3111# define XXH_likely(x) (x)
3112# define XXH_unlikely(x) (x)
3113#endif
3114
3115#if defined(__GNUC__) || defined(__clang__)
3116# if defined(__ARM_FEATURE_SVE)
3117# include <arm_sve.h>
3118# endif
3119# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3120 || (defined(_M_ARM) && _M_ARM >= 7) \
3121 || defined(_M_ARM64) || defined(_M_ARM64EC)
3122# define inline __inline__ /* circumvent a clang bug */
3123# include <arm_neon.h>
3124# undef inline
3125# elif defined(__AVX2__)
3126# include <immintrin.h>
3127# elif defined(__SSE2__)
3128# include <emmintrin.h>
3129# endif
3130#endif
3131
3132#if defined(_MSC_VER)
3133# include <intrin.h>
3134#endif
3135
3136/*
3137 * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
3138 * remaining a true 64-bit/128-bit hash function.
3139 *
3140 * This is done by prioritizing a subset of 64-bit operations that can be
3141 * emulated without too many steps on the average 32-bit machine.
3142 *
3143 * For example, these two lines seem similar, and run equally fast on 64-bit:
3144 *
3145 * xxh_u64 x;
3146 * x ^= (x >> 47); // good
3147 * x ^= (x >> 13); // bad
3148 *
3149 * However, to a 32-bit machine, there is a major difference.
3150 *
3151 * x ^= (x >> 47) looks like this:
3152 *
3153 * x.lo ^= (x.hi >> (47 - 32));
3154 *
3155 * while x ^= (x >> 13) looks like this:
3156 *
3157 * // note: funnel shifts are not usually cheap.
3158 * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
3159 * x.hi ^= (x.hi >> 13);
3160 *
3161 * The first one is significantly faster than the second, simply because the
3162 * shift is larger than 32. This means:
3163 * - All the bits we need are in the upper 32 bits, so we can ignore the lower
3164 * 32 bits in the shift.
3165 * - The shift result will always fit in the lower 32 bits, and therefore,
3166 * we can ignore the upper 32 bits in the xor.
3167 *
3168 * Thanks to this optimization, XXH3 only requires these features to be efficient:
3169 *
3170 * - Usable unaligned access
3171 * - A 32-bit or 64-bit ALU
3172 * - If 32-bit, a decent ADC instruction
3173 * - A 32 or 64-bit multiply with a 64-bit result
3174 * - For the 128-bit variant, a decent byteswap helps short inputs.
3175 *
3176 * The first two are already required by XXH32, and almost all 32-bit and 64-bit
3177 * platforms which can run XXH32 can run XXH3 efficiently.
3178 *
3179 * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
3180 * notable exception.
3181 *
3182 * First of all, Thumb-1 lacks support for the UMULL instruction which
3183 * performs the important long multiply. This means numerous __aeabi_lmul
3184 * calls.
3185 *
3186 * Second of all, the 8 functional registers are just not enough.
3187 * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
3188 * Lo registers, and this shuffling results in thousands more MOVs than A32.
3189 *
3190 * A32 and T32 don't have this limitation. They can access all 14 registers,
3191 * do a 32->64 multiply with UMULL, and the flexible operand allowing free
3192 * shifts is helpful, too.
3193 *
3194 * Therefore, we do a quick sanity check.
3195 *
3196 * If compiling Thumb-1 for a target which supports ARM instructions, we will
3197 * emit a warning, as it is not a "sane" platform to compile for.
3198 *
3199 * Usually, if this happens, it is because of an accident and you probably need
3200 * to specify -march, as you likely meant to compile for a newer architecture.
3201 *
3202 * Credit: large sections of the vectorial and asm source code paths
3203 * have been contributed by @easyaspi314
3204 */
3205#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
3206# warning "XXH3 is highly inefficient without ARM or Thumb-2."
3207#endif
3208
3209/* ==========================================
3210 * Vectorization detection
3211 * ========================================== */
3212
3213#ifdef XXH_DOXYGEN
3224# define XXH_VECTOR XXH_SCALAR
3234enum XXH_VECTOR_TYPE /* fake enum */ {
3235 XXH_SCALAR = 0,
3236 XXH_SSE2 = 1,
3242 XXH_AVX2 = 2,
3243 XXH_AVX512 = 3,
3244 XXH_NEON = 4,
3245 XXH_VSX = 5,
3246 XXH_SVE = 6,
3247};
3257# define XXH_ACC_ALIGN 8
3258#endif
3259
3260/* Actual definition */
3261#ifndef XXH_DOXYGEN
3262# define XXH_SCALAR 0
3263# define XXH_SSE2 1
3264# define XXH_AVX2 2
3265# define XXH_AVX512 3
3266# define XXH_NEON 4
3267# define XXH_VSX 5
3268# define XXH_SVE 6
3269#endif
3270
3271#ifndef XXH_VECTOR /* can be defined on command line */
3272# if defined(__ARM_FEATURE_SVE)
3273# define XXH_VECTOR XXH_SVE
3274# elif ( \
3275 defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
3276 || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
3277 ) && ( \
3278 defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
3279 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3280 )
3281# define XXH_VECTOR XXH_NEON
3282# elif defined(__AVX512F__)
3283# define XXH_VECTOR XXH_AVX512
3284# elif defined(__AVX2__)
3285# define XXH_VECTOR XXH_AVX2
3286# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
3287# define XXH_VECTOR XXH_SSE2
3288# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
3289 || (defined(__s390x__) && defined(__VEC__)) \
3290 && defined(__GNUC__) /* TODO: IBM XL */
3291# define XXH_VECTOR XXH_VSX
3292# else
3293# define XXH_VECTOR XXH_SCALAR
3294# endif
3295#endif
3296
3297/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
3298#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
3299# ifdef _MSC_VER
3300# pragma warning(once : 4606)
3301# else
3302# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
3303# endif
3304# undef XXH_VECTOR
3305# define XXH_VECTOR XXH_SCALAR
3306#endif
3307
3308/*
3309 * Controls the alignment of the accumulator,
3310 * for compatibility with aligned vector loads, which are usually faster.
3311 */
3312#ifndef XXH_ACC_ALIGN
3313# if defined(XXH_X86DISPATCH)
3314# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
3315# elif XXH_VECTOR == XXH_SCALAR /* scalar */
3316# define XXH_ACC_ALIGN 8
3317# elif XXH_VECTOR == XXH_SSE2 /* sse2 */
3318# define XXH_ACC_ALIGN 16
3319# elif XXH_VECTOR == XXH_AVX2 /* avx2 */
3320# define XXH_ACC_ALIGN 32
3321# elif XXH_VECTOR == XXH_NEON /* neon */
3322# define XXH_ACC_ALIGN 16
3323# elif XXH_VECTOR == XXH_VSX /* vsx */
3324# define XXH_ACC_ALIGN 16
3325# elif XXH_VECTOR == XXH_AVX512 /* avx512 */
3326# define XXH_ACC_ALIGN 64
3327# elif XXH_VECTOR == XXH_SVE /* sve */
3328# define XXH_ACC_ALIGN 64
3329# endif
3330#endif
3331
3332#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
3333 || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
3334# define XXH_SEC_ALIGN XXH_ACC_ALIGN
3335#elif XXH_VECTOR == XXH_SVE
3336# define XXH_SEC_ALIGN XXH_ACC_ALIGN
3337#else
3338# define XXH_SEC_ALIGN 8
3339#endif
3340
3341#if defined(__GNUC__) || defined(__clang__)
3342# define XXH_ALIASING __attribute__((may_alias))
3343#else
3344# define XXH_ALIASING /* nothing */
3345#endif
3346
3347/*
3348 * UGLY HACK:
3349 * GCC usually generates the best code with -O3 for xxHash.
3350 *
3351 * However, when targeting AVX2, it is overzealous in its unrolling resulting
3352 * in code roughly 3/4 the speed of Clang.
3353 *
3354 * There are other issues, such as GCC splitting _mm256_loadu_si256 into
3355 * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
3356 * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
3357 *
3358 * That is why when compiling the AVX2 version, it is recommended to use either
3359 * -O2 -mavx2 -march=haswell
3360 * or
3361 * -O2 -mavx2 -mno-avx256-split-unaligned-load
3362 * for decent performance, or to use Clang instead.
3363 *
3364 * Fortunately, we can control the first one with a pragma that forces GCC into
3365 * -O2, but the other one we can't control without "failed to inline always
3366 * inline function due to target mismatch" warnings.
3367 */
3368#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
3369 && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
3370 && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
3371# pragma GCC push_options
3372# pragma GCC optimize("-O2")
3373#endif
3374
3375#if XXH_VECTOR == XXH_NEON
3376
3377/*
3378 * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
3379 * optimizes out the entire hashLong loop because of the aliasing violation.
3380 *
3381 * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
3382 * so the only option is to mark it as aliasing.
3383 */
3384typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
3385
3399#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3400XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
3401{
3402 return *(xxh_aliasing_uint64x2_t const *)ptr;
3403}
3404#else
3405XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
3406{
3407 return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
3408}
3409#endif
3410
3419#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
3420XXH_FORCE_INLINE uint64x2_t
3421XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3422{
3423 /* Inline assembly is the only way */
3424 __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
3425 return acc;
3426}
3427XXH_FORCE_INLINE uint64x2_t
3428XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3429{
3430 /* This intrinsic works as expected */
3431 return vmlal_high_u32(acc, lhs, rhs);
3432}
3433#else
3434/* Portable intrinsic versions */
3435XXH_FORCE_INLINE uint64x2_t
3436XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3437{
3438 return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
3439}
3442XXH_FORCE_INLINE uint64x2_t
3443XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3444{
3445 return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
3446}
3447#endif
3448
3484# ifndef XXH3_NEON_LANES
3485# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3486 && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
3487# define XXH3_NEON_LANES 6
3488# else
3489# define XXH3_NEON_LANES XXH_ACC_NB
3490# endif
3491# endif
3492#endif /* XXH_VECTOR == XXH_NEON */
3493
3494/*
3495 * VSX and Z Vector helpers.
3496 *
3497 * This is very messy, and any pull requests to clean this up are welcome.
3498 *
3499 * There are a lot of problems with supporting VSX and s390x, due to
3500 * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
3501 */
3502#if XXH_VECTOR == XXH_VSX
3503/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
3504 * and `pixel`. This is a problem for obvious reasons.
3505 *
3506 * These keywords are unnecessary; the spec literally says they are
3507 * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
3508 * after including the header.
3509 *
3510 * We use pragma push_macro/pop_macro to keep the namespace clean. */
3511# pragma push_macro("bool")
3512# pragma push_macro("vector")
3513# pragma push_macro("pixel")
3514/* silence potential macro redefined warnings */
3515# undef bool
3516# undef vector
3517# undef pixel
3518
3519# if defined(__s390x__)
3520# include <s390intrin.h>
3521# else
3522# include <altivec.h>
3523# endif
3524
3525/* Restore the original macro values, if applicable. */
3526# pragma pop_macro("pixel")
3527# pragma pop_macro("vector")
3528# pragma pop_macro("bool")
3529
3530typedef __vector unsigned long long xxh_u64x2;
3531typedef __vector unsigned char xxh_u8x16;
3532typedef __vector unsigned xxh_u32x4;
3533
3534/*
3535 * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
3536 */
3537typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
3538
3539# ifndef XXH_VSX_BE
3540# if defined(__BIG_ENDIAN__) \
3541 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3542# define XXH_VSX_BE 1
3543# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3544# warning "-maltivec=be is not recommended. Please use native endianness."
3545# define XXH_VSX_BE 1
3546# else
3547# define XXH_VSX_BE 0
3548# endif
3549# endif /* !defined(XXH_VSX_BE) */
3550
3551# if XXH_VSX_BE
3552# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3553# define XXH_vec_revb vec_revb
3554# else
3558XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
3559{
3560 xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3561 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3562 return vec_perm(val, val, vByteSwap);
3563}
3564# endif
3565# endif /* XXH_VSX_BE */
3566
3570XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
3571{
3572 xxh_u64x2 ret;
3573 XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
3574# if XXH_VSX_BE
3575 ret = XXH_vec_revb(ret);
3576# endif
3577 return ret;
3578}
3579
3580/*
3581 * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
3582 *
3583 * These intrinsics weren't added until GCC 8, despite existing for a while,
3584 * and they are endian dependent. Also, their meaning swap depending on version.
3585 * */
3586# if defined(__s390x__)
3587 /* s390x is always big endian, no issue on this platform */
3588# define XXH_vec_mulo vec_mulo
3589# define XXH_vec_mule vec_mule
3590# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
3591/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
3592 /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
3593# define XXH_vec_mulo __builtin_altivec_vmulouw
3594# define XXH_vec_mule __builtin_altivec_vmuleuw
3595# else
3596/* gcc needs inline assembly */
3597/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
3598XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
3599{
3600 xxh_u64x2 result;
3601 __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3602 return result;
3603}
3604XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
3605{
3606 xxh_u64x2 result;
3607 __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3608 return result;
3609}
3610# endif /* XXH_vec_mulo, XXH_vec_mule */
3611#endif /* XXH_VECTOR == XXH_VSX */
3612
3613#if XXH_VECTOR == XXH_SVE
3614#define ACCRND(acc, offset) \
3615do { \
3616 svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
3617 svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
3618 svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
3619 svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
3620 svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
3621 svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
3622 svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
3623 acc = svadd_u64_x(mask, acc, mul); \
3624} while (0)
3625#endif /* XXH_VECTOR == XXH_SVE */
3626
3627
3628/* prefetch
3629 * can be disabled, by declaring XXH_NO_PREFETCH build macro */
3630#if defined(XXH_NO_PREFETCH)
3631# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3632#else
3633# if XXH_SIZE_OPT >= 1
3634# define XXH_PREFETCH(ptr) (void)(ptr)
3635# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
3636# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3637# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3638# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3639# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
3640# else
3641# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3642# endif
3643#endif /* XXH_NO_PREFETCH */
3644
3645
3646/* ==========================================
3647 * XXH3 default settings
3648 * ========================================== */
3649
3650#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
3651
3652#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3653# error "default keyset is not large enough"
3654#endif
3655
3657XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3658 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3659 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3660 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3661 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3662 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3663 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3664 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3665 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3666 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3667 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3668 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3669 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3670};
3671
3672static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;
3673static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;
3674
3675#ifdef XXH_OLD_NAMES
3676# define kSecret XXH3_kSecret
3677#endif
3678
3679#ifdef XXH_DOXYGEN
3696XXH_FORCE_INLINE xxh_u64
3697XXH_mult32to64(xxh_u64 x, xxh_u64 y)
3698{
3699 return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
3700}
3701#elif defined(_MSC_VER) && defined(_M_IX86)
3702# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3703#else
3704/*
3705 * Downcast + upcast is usually better than masking on older compilers like
3706 * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
3707 *
3708 * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
3709 * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
3710 */
3711# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
3712#endif
3713
3723static XXH128_hash_t
3724XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3725{
3726 /*
3727 * GCC/Clang __uint128_t method.
3728 *
3729 * On most 64-bit targets, GCC and Clang define a __uint128_t type.
3730 * This is usually the best way as it usually uses a native long 64-bit
3731 * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
3732 *
3733 * Usually.
3734 *
3735 * Despite being a 32-bit platform, Clang (and emscripten) define this type
3736 * despite not having the arithmetic for it. This results in a laggy
3737 * compiler builtin call which calculates a full 128-bit multiply.
3738 * In that case it is best to use the portable one.
3739 * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
3740 */
3741#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
3742 && defined(__SIZEOF_INT128__) \
3743 || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3744
3745 __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
3746 XXH128_hash_t r128;
3747 r128.low64 = (xxh_u64)(product);
3748 r128.high64 = (xxh_u64)(product >> 64);
3749 return r128;
3750
3751 /*
3752 * MSVC for x64's _umul128 method.
3753 *
3754 * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
3755 *
3756 * This compiles to single operand MUL on x64.
3757 */
3758#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
3759
3760#ifndef _MSC_VER
3761# pragma intrinsic(_umul128)
3762#endif
3763 xxh_u64 product_high;
3764 xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
3765 XXH128_hash_t r128;
3766 r128.low64 = product_low;
3767 r128.high64 = product_high;
3768 return r128;
3769
3770 /*
3771 * MSVC for ARM64's __umulh method.
3772 *
3773 * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
3774 */
3775#elif defined(_M_ARM64) || defined(_M_ARM64EC)
3776
3777#ifndef _MSC_VER
3778# pragma intrinsic(__umulh)
3779#endif
3780 XXH128_hash_t r128;
3781 r128.low64 = lhs * rhs;
3782 r128.high64 = __umulh(lhs, rhs);
3783 return r128;
3784
3785#else
3786 /*
3787 * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
3788 *
3789 * This is a fast and simple grade school multiply, which is shown below
3790 * with base 10 arithmetic instead of base 0x100000000.
3791 *
3792 * 9 3 // D2 lhs = 93
3793 * x 7 5 // D2 rhs = 75
3794 * ----------
3795 * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
3796 * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
3797 * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
3798 * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
3799 * ---------
3800 * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
3801 * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
3802 * ---------
3803 * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
3804 *
3805 * The reasons for adding the products like this are:
3806 * 1. It avoids manual carry tracking. Just like how
3807 * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
3808 * This avoids a lot of complexity.
3809 *
3810 * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
3811 * instruction available in ARM's Digital Signal Processing extension
3812 * in 32-bit ARMv6 and later, which is shown below:
3813 *
3814 * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
3815 * {
3816 * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
3817 * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
3818 * *RdHi = (xxh_u32)(product >> 32);
3819 * }
3820 *
3821 * This instruction was designed for efficient long multiplication, and
3822 * allows this to be calculated in only 4 instructions at speeds
3823 * comparable to some 64-bit ALUs.
3824 *
3825 * 3. It isn't terrible on other platforms. Usually this will be a couple
3826 * of 32-bit ADD/ADCs.
3827 */
3828
3829 /* First calculate all of the cross products. */
3830 xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
3831 xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
3832 xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
3833 xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
3834
3835 /* Now add the products together. These will never overflow. */
3836 xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
3837 xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
3838 xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
3839
3840 XXH128_hash_t r128;
3841 r128.low64 = lower;
3842 r128.high64 = upper;
3843 return r128;
3844#endif
3845}
3846
3857static xxh_u64
3858XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3859{
3860 XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
3861 return product.low64 ^ product.high64;
3862}
3863
3865XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
3866{
3867 XXH_ASSERT(0 <= shift && shift < 64);
3868 return v64 ^ (v64 >> shift);
3869}
3870
3871/*
3872 * This is a fast avalanche stage,
3873 * suitable when input bits are already partially mixed
3874 */
3875static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
3876{
3877 h64 = XXH_xorshift64(h64, 37);
3878 h64 *= PRIME_MX1;
3879 h64 = XXH_xorshift64(h64, 32);
3880 return h64;
3881}
3882
3883/*
3884 * This is a stronger avalanche,
3885 * inspired by Pelle Evensen's rrmxmx
3886 * preferable when input has not been previously mixed
3887 */
3888static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3889{
3890 /* this mix is inspired by Pelle Evensen's rrmxmx */
3891 h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
3892 h64 *= PRIME_MX2;
3893 h64 ^= (h64 >> 35) + len ;
3894 h64 *= PRIME_MX2;
3895 return XXH_xorshift64(h64, 28);
3896}
3897
3898
3899/* ==========================================
3900 * Short keys
3901 * ==========================================
3902 * One of the shortcomings of XXH32 and XXH64 was that their performance was
3903 * sub-optimal on short lengths. It used an iterative algorithm which strongly
3904 * favored lengths that were a multiple of 4 or 8.
3905 *
3906 * Instead of iterating over individual inputs, we use a set of single shot
3907 * functions which piece together a range of lengths and operate in constant time.
3908 *
3909 * Additionally, the number of multiplies has been significantly reduced. This
3910 * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
3911 *
3912 * Depending on the platform, this may or may not be faster than XXH32, but it
3913 * is almost guaranteed to be faster than XXH64.
3914 */
3915
3916/*
3917 * At very short lengths, there isn't enough input to fully hide secrets, or use
3918 * the entire secret.
3919 *
3920 * There is also only a limited amount of mixing we can do before significantly
3921 * impacting performance.
3922 *
3923 * Therefore, we use different sections of the secret and always mix two secret
3924 * samples with an XOR. This should have no effect on performance on the
3925 * seedless or withSeed variants because everything _should_ be constant folded
3926 * by modern compilers.
3927 *
3928 * The XOR mixing hides individual parts of the secret and increases entropy.
3929 *
3930 * This adds an extra layer of strength for custom secrets.
3931 */
3932XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3933XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3934{
3935 XXH_ASSERT(input != NULL);
3936 XXH_ASSERT(1 <= len && len <= 3);
3937 XXH_ASSERT(secret != NULL);
3938 /*
3939 * len = 1: combined = { input[0], 0x01, input[0], input[0] }
3940 * len = 2: combined = { input[1], 0x02, input[0], input[1] }
3941 * len = 3: combined = { input[2], 0x03, input[0], input[1] }
3942 */
3943 { xxh_u8 const c1 = input[0];
3944 xxh_u8 const c2 = input[len >> 1];
3945 xxh_u8 const c3 = input[len - 1];
3946 xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)
3947 | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
3948 xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
3949 xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
3950 return XXH64_avalanche(keyed);
3951 }
3952}
3953
3954XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3955XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3956{
3957 XXH_ASSERT(input != NULL);
3958 XXH_ASSERT(secret != NULL);
3959 XXH_ASSERT(4 <= len && len <= 8);
3960 seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
3961 { xxh_u32 const input1 = XXH_readLE32(input);
3962 xxh_u32 const input2 = XXH_readLE32(input + len - 4);
3963 xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
3964 xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
3965 xxh_u64 const keyed = input64 ^ bitflip;
3966 return XXH3_rrmxmx(keyed, len);
3967 }
3968}
3969
3970XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3971XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3972{
3973 XXH_ASSERT(input != NULL);
3974 XXH_ASSERT(secret != NULL);
3975 XXH_ASSERT(9 <= len && len <= 16);
3976 { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
3977 xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
3978 xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
3979 xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
3980 xxh_u64 const acc = len
3981 + XXH_swap64(input_lo) + input_hi
3982 + XXH3_mul128_fold64(input_lo, input_hi);
3983 return XXH3_avalanche(acc);
3984 }
3985}
3986
3987XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3988XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3989{
3990 XXH_ASSERT(len <= 16);
3991 { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
3992 if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
3993 if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
3994 return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
3995 }
3996}
3997
3998/*
3999 * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
4000 * multiplication by zero, affecting hashes of lengths 17 to 240.
4001 *
4002 * However, they are very unlikely.
4003 *
4004 * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
4005 * unseeded non-cryptographic hashes, it does not attempt to defend itself
4006 * against specially crafted inputs, only random inputs.
4007 *
4008 * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
4009 * cancelling out the secret is taken an arbitrary number of times (addressed
4010 * in XXH3_accumulate_512), this collision is very unlikely with random inputs
4011 * and/or proper seeding:
4012 *
4013 * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
4014 * function that is only called up to 16 times per hash with up to 240 bytes of
4015 * input.
4016 *
4017 * This is not too bad for a non-cryptographic hash function, especially with
4018 * only 64 bit outputs.
4019 *
4020 * The 128-bit variant (which trades some speed for strength) is NOT affected
4021 * by this, although it is always a good idea to use a proper seed if you care
4022 * about strength.
4023 */
4024XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
4025 const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
4026{
4027#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
4028 && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
4029 && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
4030 /*
4031 * UGLY HACK:
4032 * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
4033 * slower code.
4034 *
4035 * By forcing seed64 into a register, we disrupt the cost model and
4036 * cause it to scalarize. See `XXH32_round()`
4037 *
4038 * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
4039 * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
4040 * GCC 9.2, despite both emitting scalar code.
4041 *
4042 * GCC generates much better scalar code than Clang for the rest of XXH3,
4043 * which is why finding a more optimal codepath is an interest.
4044 */
4045 XXH_COMPILER_GUARD(seed64);
4046#endif
4047 { xxh_u64 const input_lo = XXH_readLE64(input);
4048 xxh_u64 const input_hi = XXH_readLE64(input+8);
4049 return XXH3_mul128_fold64(
4050 input_lo ^ (XXH_readLE64(secret) + seed64),
4051 input_hi ^ (XXH_readLE64(secret+8) - seed64)
4052 );
4053 }
4054}
4055
4056/* For mid range keys, XXH3 uses a Mum-hash variant. */
4057XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4058XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4059 const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4060 XXH64_hash_t seed)
4061{
4062 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4063 XXH_ASSERT(16 < len && len <= 128);
4064
4065 { xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
4066#if XXH_SIZE_OPT >= 1
4067 /* Smaller and cleaner, but slightly slower. */
4068 unsigned int i = (unsigned int)(len - 1) / 32;
4069 do {
4070 acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
4071 acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
4072 } while (i-- != 0);
4073 acc_end = 0;
4074#else
4075 acc += XXH3_mix16B(input+0, secret+0, seed);
4076 acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
4077 if (len > 32) {
4078 acc += XXH3_mix16B(input+16, secret+32, seed);
4079 acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
4080 if (len > 64) {
4081 acc += XXH3_mix16B(input+32, secret+64, seed);
4082 acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
4083
4084 if (len > 96) {
4085 acc += XXH3_mix16B(input+48, secret+96, seed);
4086 acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
4087 }
4088 }
4089 }
4090#endif
4091 return XXH3_avalanche(acc + acc_end);
4092 }
4093}
4094
4095#define XXH3_MIDSIZE_MAX 240
4096
4097XXH_NO_INLINE XXH_PUREF XXH64_hash_t
4098XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4099 const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4100 XXH64_hash_t seed)
4101{
4102 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4103 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4104
4105 #define XXH3_MIDSIZE_STARTOFFSET 3
4106 #define XXH3_MIDSIZE_LASTOFFSET 17
4107
4108 { xxh_u64 acc = len * XXH_PRIME64_1;
4109 xxh_u64 acc_end;
4110 unsigned int const nbRounds = (unsigned int)len / 16;
4111 unsigned int i;
4112 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4113 for (i=0; i<8; i++) {
4114 acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
4115 }
4116 /* last bytes */
4117 acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
4118 XXH_ASSERT(nbRounds >= 8);
4119 acc = XXH3_avalanche(acc);
4120#if defined(__clang__) /* Clang */ \
4121 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
4122 && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
4123 /*
4124 * UGLY HACK:
4125 * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
4126 * In everywhere else, it uses scalar code.
4127 *
4128 * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
4129 * would still be slower than UMAAL (see XXH_mult64to128).
4130 *
4131 * Unfortunately, Clang doesn't handle the long multiplies properly and
4132 * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
4133 * scalarized into an ugly mess of VMOV.32 instructions.
4134 *
4135 * This mess is difficult to avoid without turning autovectorization
4136 * off completely, but they are usually relatively minor and/or not
4137 * worth it to fix.
4138 *
4139 * This loop is the easiest to fix, as unlike XXH32, this pragma
4140 * _actually works_ because it is a loop vectorization instead of an
4141 * SLP vectorization.
4142 */
4143 #pragma clang loop vectorize(disable)
4144#endif
4145 for (i=8 ; i < nbRounds; i++) {
4146 /*
4147 * Prevents clang for unrolling the acc loop and interleaving with this one.
4148 */
4149 XXH_COMPILER_GUARD(acc);
4150 acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
4151 }
4152 return XXH3_avalanche(acc + acc_end);
4153 }
4154}
4155
4156
4157/* ======= Long Keys ======= */
4158
4159#define XXH_STRIPE_LEN 64
4160#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
4161#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
4162
4163#ifdef XXH_OLD_NAMES
4164# define STRIPE_LEN XXH_STRIPE_LEN
4165# define ACC_NB XXH_ACC_NB
4166#endif
4167
4168#ifndef XXH_PREFETCH_DIST
4169# ifdef __clang__
4170# define XXH_PREFETCH_DIST 320
4171# else
4172# if (XXH_VECTOR == XXH_AVX512)
4173# define XXH_PREFETCH_DIST 512
4174# else
4175# define XXH_PREFETCH_DIST 384
4176# endif
4177# endif /* __clang__ */
4178#endif /* XXH_PREFETCH_DIST */
4179
4180/*
4181 * These macros are to generate an XXH3_accumulate() function.
4182 * The two arguments select the name suffix and target attribute.
4183 *
4184 * The name of this symbol is XXH3_accumulate_<name>() and it calls
4185 * XXH3_accumulate_512_<name>().
4186 *
4187 * It may be useful to hand implement this function if the compiler fails to
4188 * optimize the inline function.
4189 */
4190#define XXH3_ACCUMULATE_TEMPLATE(name) \
4191void \
4192XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
4193 const xxh_u8* XXH_RESTRICT input, \
4194 const xxh_u8* XXH_RESTRICT secret, \
4195 size_t nbStripes) \
4196{ \
4197 size_t n; \
4198 for (n = 0; n < nbStripes; n++ ) { \
4199 const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
4200 XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
4201 XXH3_accumulate_512_##name( \
4202 acc, \
4203 in, \
4204 secret + n*XXH_SECRET_CONSUME_RATE); \
4205 } \
4206}
4207
4208
4209XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
4210{
4211 if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
4212 XXH_memcpy(dst, &v64, sizeof(v64));
4213}
4214
4215/* Several intrinsic functions below are supposed to accept __int64 as argument,
4216 * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
4217 * However, several environments do not define __int64 type,
4218 * requiring a workaround.
4219 */
4220#if !defined (__VMS) \
4221 && (defined (__cplusplus) \
4222 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
4223 typedef int64_t xxh_i64;
4224#else
4225 /* the following type must have a width of 64-bit */
4226 typedef long long xxh_i64;
4227#endif
4228
4229
4230/*
4231 * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
4232 *
4233 * It is a hardened version of UMAC, based off of FARSH's implementation.
4234 *
4235 * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
4236 * implementations, and it is ridiculously fast.
4237 *
4238 * We harden it by mixing the original input to the accumulators as well as the product.
4239 *
4240 * This means that in the (relatively likely) case of a multiply by zero, the
4241 * original input is preserved.
4242 *
4243 * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
4244 * cross-pollination, as otherwise the upper and lower halves would be
4245 * essentially independent.
4246 *
4247 * This doesn't matter on 64-bit hashes since they all get merged together in
4248 * the end, so we skip the extra step.
4249 *
4250 * Both XXH3_64bits and XXH3_128bits use this subroutine.
4251 */
4252
4253#if (XXH_VECTOR == XXH_AVX512) \
4254 || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
4255
4256#ifndef XXH_TARGET_AVX512
4257# define XXH_TARGET_AVX512 /* disable attribute target */
4258#endif
4259
4260XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4261XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
4262 const void* XXH_RESTRICT input,
4263 const void* XXH_RESTRICT secret)
4264{
4265 __m512i* const xacc = (__m512i *) acc;
4266 XXH_ASSERT((((size_t)acc) & 63) == 0);
4267 XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4268
4269 {
4270 /* data_vec = input[0]; */
4271 __m512i const data_vec = _mm512_loadu_si512 (input);
4272 /* key_vec = secret[0]; */
4273 __m512i const key_vec = _mm512_loadu_si512 (secret);
4274 /* data_key = data_vec ^ key_vec; */
4275 __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
4276 /* data_key_lo = data_key >> 32; */
4277 __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
4278 /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4279 __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
4280 /* xacc[0] += swap(data_vec); */
4281 __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
4282 __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
4283 /* xacc[0] += product; */
4284 *xacc = _mm512_add_epi64(product, sum);
4285 }
4286}
4287XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
4288
4289/*
4290 * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
4291 *
4292 * Multiplication isn't perfect, as explained by Google in HighwayHash:
4293 *
4294 * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
4295 * // varying degrees. In descending order of goodness, bytes
4296 * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
4297 * // As expected, the upper and lower bytes are much worse.
4298 *
4299 * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
4300 *
4301 * Since our algorithm uses a pseudorandom secret to add some variance into the
4302 * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
4303 *
4304 * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
4305 * extraction.
4306 *
4307 * Both XXH3_64bits and XXH3_128bits use this subroutine.
4308 */
4309
4310XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4311XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4312{
4313 XXH_ASSERT((((size_t)acc) & 63) == 0);
4314 XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4315 { __m512i* const xacc = (__m512i*) acc;
4316 const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
4317
4318 /* xacc[0] ^= (xacc[0] >> 47) */
4319 __m512i const acc_vec = *xacc;
4320 __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
4321 /* xacc[0] ^= secret; */
4322 __m512i const key_vec = _mm512_loadu_si512 (secret);
4323 __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
4324
4325 /* xacc[0] *= XXH_PRIME32_1; */
4326 __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
4327 __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
4328 __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
4329 *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
4330 }
4331}
4332
4333XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4334XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4335{
4336 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
4337 XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
4338 XXH_ASSERT(((size_t)customSecret & 63) == 0);
4339 (void)(&XXH_writeLE64);
4340 { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
4341 __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
4342 __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
4343
4344 const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
4345 __m512i* const dest = ( __m512i*) customSecret;
4346 int i;
4347 XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
4348 XXH_ASSERT(((size_t)dest & 63) == 0);
4349 for (i=0; i < nbRounds; ++i) {
4350 dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
4351 } }
4352}
4353
4354#endif
4355
4356#if (XXH_VECTOR == XXH_AVX2) \
4357 || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
4358
4359#ifndef XXH_TARGET_AVX2
4360# define XXH_TARGET_AVX2 /* disable attribute target */
4361#endif
4362
4363XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4364XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
4365 const void* XXH_RESTRICT input,
4366 const void* XXH_RESTRICT secret)
4367{
4368 XXH_ASSERT((((size_t)acc) & 31) == 0);
4369 { __m256i* const xacc = (__m256i *) acc;
4370 /* Unaligned. This is mainly for pointer arithmetic, and because
4371 * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4372 const __m256i* const xinput = (const __m256i *) input;
4373 /* Unaligned. This is mainly for pointer arithmetic, and because
4374 * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4375 const __m256i* const xsecret = (const __m256i *) secret;
4376
4377 size_t i;
4378 for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4379 /* data_vec = xinput[i]; */
4380 __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
4381 /* key_vec = xsecret[i]; */
4382 __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4383 /* data_key = data_vec ^ key_vec; */
4384 __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4385 /* data_key_lo = data_key >> 32; */
4386 __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
4387 /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4388 __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
4389 /* xacc[i] += swap(data_vec); */
4390 __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
4391 __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
4392 /* xacc[i] += product; */
4393 xacc[i] = _mm256_add_epi64(product, sum);
4394 } }
4395}
4396XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
4397
4398XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4399XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4400{
4401 XXH_ASSERT((((size_t)acc) & 31) == 0);
4402 { __m256i* const xacc = (__m256i*) acc;
4403 /* Unaligned. This is mainly for pointer arithmetic, and because
4404 * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4405 const __m256i* const xsecret = (const __m256i *) secret;
4406 const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
4407
4408 size_t i;
4409 for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4410 /* xacc[i] ^= (xacc[i] >> 47) */
4411 __m256i const acc_vec = xacc[i];
4412 __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);
4413 __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);
4414 /* xacc[i] ^= xsecret; */
4415 __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4416 __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4417
4418 /* xacc[i] *= XXH_PRIME32_1; */
4419 __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
4420 __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
4421 __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
4422 xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
4423 }
4424 }
4425}
4426
4427XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4428{
4429 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
4430 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
4431 XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
4432 (void)(&XXH_writeLE64);
4433 XXH_PREFETCH(customSecret);
4434 { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
4435
4436 const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);
4437 __m256i* dest = ( __m256i*) customSecret;
4438
4439# if defined(__GNUC__) || defined(__clang__)
4440 /*
4441 * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4442 * - do not extract the secret from sse registers in the internal loop
4443 * - use less common registers, and avoid pushing these reg into stack
4444 */
4445 XXH_COMPILER_GUARD(dest);
4446# endif
4447 XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
4448 XXH_ASSERT(((size_t)dest & 31) == 0);
4449
4450 /* GCC -O2 need unroll loop manually */
4451 dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
4452 dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
4453 dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
4454 dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
4455 dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
4456 dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
4457 }
4458}
4459
4460#endif
4461
4462/* x86dispatch always generates SSE2 */
4463#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
4464
4465#ifndef XXH_TARGET_SSE2
4466# define XXH_TARGET_SSE2 /* disable attribute target */
4467#endif
4468
4469XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4470XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
4471 const void* XXH_RESTRICT input,
4472 const void* XXH_RESTRICT secret)
4473{
4474 /* SSE2 is just a half-scale version of the AVX2 version. */
4475 XXH_ASSERT((((size_t)acc) & 15) == 0);
4476 { __m128i* const xacc = (__m128i *) acc;
4477 /* Unaligned. This is mainly for pointer arithmetic, and because
4478 * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4479 const __m128i* const xinput = (const __m128i *) input;
4480 /* Unaligned. This is mainly for pointer arithmetic, and because
4481 * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4482 const __m128i* const xsecret = (const __m128i *) secret;
4483
4484 size_t i;
4485 for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4486 /* data_vec = xinput[i]; */
4487 __m128i const data_vec = _mm_loadu_si128 (xinput+i);
4488 /* key_vec = xsecret[i]; */
4489 __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4490 /* data_key = data_vec ^ key_vec; */
4491 __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4492 /* data_key_lo = data_key >> 32; */
4493 __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4494 /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4495 __m128i const product = _mm_mul_epu32 (data_key, data_key_lo);
4496 /* xacc[i] += swap(data_vec); */
4497 __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
4498 __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
4499 /* xacc[i] += product; */
4500 xacc[i] = _mm_add_epi64(product, sum);
4501 } }
4502}
4503XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
4504
4505XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4506XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4507{
4508 XXH_ASSERT((((size_t)acc) & 15) == 0);
4509 { __m128i* const xacc = (__m128i*) acc;
4510 /* Unaligned. This is mainly for pointer arithmetic, and because
4511 * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4512 const __m128i* const xsecret = (const __m128i *) secret;
4513 const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
4514
4515 size_t i;
4516 for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4517 /* xacc[i] ^= (xacc[i] >> 47) */
4518 __m128i const acc_vec = xacc[i];
4519 __m128i const shifted = _mm_srli_epi64 (acc_vec, 47);
4520 __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);
4521 /* xacc[i] ^= xsecret[i]; */
4522 __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4523 __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4524
4525 /* xacc[i] *= XXH_PRIME32_1; */
4526 __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4527 __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);
4528 __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
4529 xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
4530 }
4531 }
4532}
4533
4534XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4535{
4536 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4537 (void)(&XXH_writeLE64);
4538 { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
4539
4540# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
4541 /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
4542 XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
4543 __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
4544# else
4545 __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
4546# endif
4547 int i;
4548
4549 const void* const src16 = XXH3_kSecret;
4550 __m128i* dst16 = (__m128i*) customSecret;
4551# if defined(__GNUC__) || defined(__clang__)
4552 /*
4553 * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4554 * - do not extract the secret from sse registers in the internal loop
4555 * - use less common registers, and avoid pushing these reg into stack
4556 */
4557 XXH_COMPILER_GUARD(dst16);
4558# endif
4559 XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
4560 XXH_ASSERT(((size_t)dst16 & 15) == 0);
4561
4562 for (i=0; i < nbRounds; ++i) {
4563 dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
4564 } }
4565}
4566
4567#endif
4568
4569#if (XXH_VECTOR == XXH_NEON)
4570
4571/* forward declarations for the scalar routines */
4572XXH_FORCE_INLINE void
4573XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
4574 void const* XXH_RESTRICT secret, size_t lane);
4575
4576XXH_FORCE_INLINE void
4577XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
4578 void const* XXH_RESTRICT secret, size_t lane);
4579
4600XXH_FORCE_INLINE void
4601XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
4602 const void* XXH_RESTRICT input,
4603 const void* XXH_RESTRICT secret)
4604{
4605 XXH_ASSERT((((size_t)acc) & 15) == 0);
4606 XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4607 { /* GCC for darwin arm64 does not like aliasing here */
4608 xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
4609 /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
4610 uint8_t const* const xinput = (const uint8_t *) input;
4611 uint8_t const* const xsecret = (const uint8_t *) secret;
4612
4613 size_t i;
4614 /* Scalar lanes use the normal scalarRound routine */
4615 for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4616 XXH3_scalarRound(acc, input, secret, i);
4617 }
4618 i = 0;
4619 /* 4 NEON lanes at a time. */
4620 for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
4621 /* data_vec = xinput[i]; */
4622 uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
4623 uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
4624 /* key_vec = xsecret[i]; */
4625 uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
4626 uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
4627 /* data_swap = swap(data_vec) */
4628 uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
4629 uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
4630 /* data_key = data_vec ^ key_vec; */
4631 uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
4632 uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
4633
4634 /*
4635 * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
4636 * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
4637 * get one vector with the low 32 bits of each lane, and one vector
4638 * with the high 32 bits of each lane.
4639 *
4640 * This compiles to two instructions on AArch64 and has a paired vector
4641 * result, which is an artifact from ARMv7a's version which modified both
4642 * vectors in place.
4643 *
4644 * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
4645 * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
4646 */
4647 uint32x4x2_t unzipped = vuzpq_u32(
4648 vreinterpretq_u32_u64(data_key_1),
4649 vreinterpretq_u32_u64(data_key_2)
4650 );
4651 /* data_key_lo = data_key & 0xFFFFFFFF */
4652 uint32x4_t data_key_lo = unzipped.val[0];
4653 /* data_key_hi = data_key >> 32 */
4654 uint32x4_t data_key_hi = unzipped.val[1];
4655 /*
4656 * Then, we can split the vectors horizontally and multiply which, as for most
4657 * widening intrinsics, have a variant that works on both high half vectors
4658 * for free on AArch64.
4659 *
4660 * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
4661 */
4662 uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
4663 uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
4664 /*
4665 * Clang reorders
4666 * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s
4667 * c += a; // add acc.2d, acc.2d, swap.2d
4668 * to
4669 * c += a; // add acc.2d, acc.2d, swap.2d
4670 * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s
4671 *
4672 * While it would make sense in theory since the addition is faster,
4673 * for reasons likely related to umlal being limited to certain NEON
4674 * pipelines, this is worse. A compiler guard fixes this.
4675 */
4676 XXH_COMPILER_GUARD_W(sum_1);
4677 XXH_COMPILER_GUARD_W(sum_2);
4678 /* xacc[i] = acc_vec + sum; */
4679 xacc[i] = vaddq_u64(xacc[i], sum_1);
4680 xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
4681 }
4682 /* Operate on the remaining NEON lanes 2 at a time. */
4683 for (; i < XXH3_NEON_LANES / 2; i++) {
4684 /* data_vec = xinput[i]; */
4685 uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
4686 /* key_vec = xsecret[i]; */
4687 uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4688 /* acc_vec_2 = swap(data_vec) */
4689 uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
4690 /* data_key = data_vec ^ key_vec; */
4691 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4692 /* For two lanes, just use VMOVN and VSHRN. */
4693 /* data_key_lo = data_key & 0xFFFFFFFF; */
4694 uint32x2_t data_key_lo = vmovn_u64(data_key);
4695 /* data_key_hi = data_key >> 32; */
4696 uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4697 /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
4698 uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
4699 /* Same Clang workaround as before */
4700 XXH_COMPILER_GUARD_W(sum);
4701 /* xacc[i] = acc_vec + sum; */
4702 xacc[i] = vaddq_u64 (xacc[i], sum);
4703 }
4704 }
4705}
4706XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
4707
4708XXH_FORCE_INLINE void
4709XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4710{
4711 XXH_ASSERT((((size_t)acc) & 15) == 0);
4712
4713 { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;
4714 uint8_t const* xsecret = (uint8_t const*) secret;
4715 uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
4716
4717 size_t i;
4718 /* AArch64 uses both scalar and neon at the same time */
4719 for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4720 XXH3_scalarScrambleRound(acc, secret, i);
4721 }
4722 for (i=0; i < XXH3_NEON_LANES / 2; i++) {
4723 /* xacc[i] ^= (xacc[i] >> 47); */
4724 uint64x2_t acc_vec = xacc[i];
4725 uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
4726 uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
4727
4728 /* xacc[i] ^= xsecret[i]; */
4729 uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4730 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4731
4732 /* xacc[i] *= XXH_PRIME32_1 */
4733 uint32x2_t data_key_lo = vmovn_u64(data_key);
4734 uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4735 /*
4736 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
4737 *
4738 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
4739 * incorrectly "optimize" this:
4740 * tmp = vmul_u32(vmovn_u64(a), vmovn_u64(b));
4741 * shifted = vshll_n_u32(tmp, 32);
4742 * to this:
4743 * tmp = "vmulq_u64"(a, b); // no such thing!
4744 * shifted = vshlq_n_u64(tmp, 32);
4745 *
4746 * However, unlike SSE, Clang lacks a 64-bit multiply routine
4747 * for NEON, and it scalarizes two 64-bit multiplies instead.
4748 *
4749 * vmull_u32 has the same timing as vmul_u32, and it avoids
4750 * this bug completely.
4751 * See https://bugs.llvm.org/show_bug.cgi?id=39967
4752 */
4753 uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4754 /* xacc[i] = prod_hi << 32; */
4755 prod_hi = vshlq_n_u64(prod_hi, 32);
4756 /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
4757 xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
4758 }
4759 }
4760}
4761#endif
4762
4763#if (XXH_VECTOR == XXH_VSX)
4764
4765XXH_FORCE_INLINE void
4766XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
4767 const void* XXH_RESTRICT input,
4768 const void* XXH_RESTRICT secret)
4769{
4770 /* presumed aligned */
4771 xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
4772 xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */
4773 xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
4774 xxh_u64x2 const v32 = { 32, 32 };
4775 size_t i;
4776 for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4777 /* data_vec = xinput[i]; */
4778 xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
4779 /* key_vec = xsecret[i]; */
4780 xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
4781 xxh_u64x2 const data_key = data_vec ^ key_vec;
4782 /* shuffled = (data_key << 32) | (data_key >> 32); */
4783 xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4784 /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
4785 xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4786 /* acc_vec = xacc[i]; */
4787 xxh_u64x2 acc_vec = xacc[i];
4788 acc_vec += product;
4789
4790 /* swap high and low halves */
4791#ifdef __s390x__
4792 acc_vec += vec_permi(data_vec, data_vec, 2);
4793#else
4794 acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4795#endif
4796 xacc[i] = acc_vec;
4797 }
4798}
4799XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
4800
4801XXH_FORCE_INLINE void
4802XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4803{
4804 XXH_ASSERT((((size_t)acc) & 15) == 0);
4805
4806 { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
4807 const xxh_u8* const xsecret = (const xxh_u8*) secret;
4808 /* constants */
4809 xxh_u64x2 const v32 = { 32, 32 };
4810 xxh_u64x2 const v47 = { 47, 47 };
4811 xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
4812 size_t i;
4813 for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
4814 /* xacc[i] ^= (xacc[i] >> 47); */
4815 xxh_u64x2 const acc_vec = xacc[i];
4816 xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
4817
4818 /* xacc[i] ^= xsecret[i]; */
4819 xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
4820 xxh_u64x2 const data_key = data_vec ^ key_vec;
4821
4822 /* xacc[i] *= XXH_PRIME32_1 */
4823 /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */
4824 xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
4825 /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */
4826 xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
4827 xacc[i] = prod_odd + (prod_even << v32);
4828 } }
4829}
4830
4831#endif
4832
4833#if (XXH_VECTOR == XXH_SVE)
4834
4835XXH_FORCE_INLINE void
4836XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
4837 const void* XXH_RESTRICT input,
4838 const void* XXH_RESTRICT secret)
4839{
4840 uint64_t *xacc = (uint64_t *)acc;
4841 const uint64_t *xinput = (const uint64_t *)(const void *)input;
4842 const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
4843 svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4844 uint64_t element_count = svcntd();
4845 if (element_count >= 8) {
4846 svbool_t mask = svptrue_pat_b64(SV_VL8);
4847 svuint64_t vacc = svld1_u64(mask, xacc);
4848 ACCRND(vacc, 0);
4849 svst1_u64(mask, xacc, vacc);
4850 } else if (element_count == 2) { /* sve128 */
4851 svbool_t mask = svptrue_pat_b64(SV_VL2);
4852 svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4853 svuint64_t acc1 = svld1_u64(mask, xacc + 2);
4854 svuint64_t acc2 = svld1_u64(mask, xacc + 4);
4855 svuint64_t acc3 = svld1_u64(mask, xacc + 6);
4856 ACCRND(acc0, 0);
4857 ACCRND(acc1, 2);
4858 ACCRND(acc2, 4);
4859 ACCRND(acc3, 6);
4860 svst1_u64(mask, xacc + 0, acc0);
4861 svst1_u64(mask, xacc + 2, acc1);
4862 svst1_u64(mask, xacc + 4, acc2);
4863 svst1_u64(mask, xacc + 6, acc3);
4864 } else {
4865 svbool_t mask = svptrue_pat_b64(SV_VL4);
4866 svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4867 svuint64_t acc1 = svld1_u64(mask, xacc + 4);
4868 ACCRND(acc0, 0);
4869 ACCRND(acc1, 4);
4870 svst1_u64(mask, xacc + 0, acc0);
4871 svst1_u64(mask, xacc + 4, acc1);
4872 }
4873}
4874
4875XXH_FORCE_INLINE void
4876XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
4877 const xxh_u8* XXH_RESTRICT input,
4878 const xxh_u8* XXH_RESTRICT secret,
4879 size_t nbStripes)
4880{
4881 if (nbStripes != 0) {
4882 uint64_t *xacc = (uint64_t *)acc;
4883 const uint64_t *xinput = (const uint64_t *)(const void *)input;
4884 const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
4885 svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4886 uint64_t element_count = svcntd();
4887 if (element_count >= 8) {
4888 svbool_t mask = svptrue_pat_b64(SV_VL8);
4889 svuint64_t vacc = svld1_u64(mask, xacc + 0);
4890 do {
4891 /* svprfd(svbool_t, void *, enum svfprop); */
4892 svprfd(mask, xinput + 128, SV_PLDL1STRM);
4893 ACCRND(vacc, 0);
4894 xinput += 8;
4895 xsecret += 1;
4896 nbStripes--;
4897 } while (nbStripes != 0);
4898
4899 svst1_u64(mask, xacc + 0, vacc);
4900 } else if (element_count == 2) { /* sve128 */
4901 svbool_t mask = svptrue_pat_b64(SV_VL2);
4902 svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4903 svuint64_t acc1 = svld1_u64(mask, xacc + 2);
4904 svuint64_t acc2 = svld1_u64(mask, xacc + 4);
4905 svuint64_t acc3 = svld1_u64(mask, xacc + 6);
4906 do {
4907 svprfd(mask, xinput + 128, SV_PLDL1STRM);
4908 ACCRND(acc0, 0);
4909 ACCRND(acc1, 2);
4910 ACCRND(acc2, 4);
4911 ACCRND(acc3, 6);
4912 xinput += 8;
4913 xsecret += 1;
4914 nbStripes--;
4915 } while (nbStripes != 0);
4916
4917 svst1_u64(mask, xacc + 0, acc0);
4918 svst1_u64(mask, xacc + 2, acc1);
4919 svst1_u64(mask, xacc + 4, acc2);
4920 svst1_u64(mask, xacc + 6, acc3);
4921 } else {
4922 svbool_t mask = svptrue_pat_b64(SV_VL4);
4923 svuint64_t acc0 = svld1_u64(mask, xacc + 0);
4924 svuint64_t acc1 = svld1_u64(mask, xacc + 4);
4925 do {
4926 svprfd(mask, xinput + 128, SV_PLDL1STRM);
4927 ACCRND(acc0, 0);
4928 ACCRND(acc1, 4);
4929 xinput += 8;
4930 xsecret += 1;
4931 nbStripes--;
4932 } while (nbStripes != 0);
4933
4934 svst1_u64(mask, xacc + 0, acc0);
4935 svst1_u64(mask, xacc + 4, acc1);
4936 }
4937 }
4938}
4939
4940#endif
4941
4942/* scalar variants - universal */
4943
4944#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
4945/*
4946 * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
4947 * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
4948 *
4949 * While this might not seem like much, as AArch64 is a 64-bit architecture, only
4950 * big Cortex designs have a full 64-bit multiplier.
4951 *
4952 * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
4953 * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
4954 * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
4955 *
4956 * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
4957 * not have this penalty and does the mask automatically.
4958 */
4959XXH_FORCE_INLINE xxh_u64
4960XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
4961{
4962 xxh_u64 ret;
4963 /* note: %x = 64-bit register, %w = 32-bit register */
4964 __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
4965 return ret;
4966}
4967#else
4968XXH_FORCE_INLINE xxh_u64
4969XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
4970{
4971 return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
4972}
4973#endif
4974
4982XXH_FORCE_INLINE void
4983XXH3_scalarRound(void* XXH_RESTRICT acc,
4984 void const* XXH_RESTRICT input,
4985 void const* XXH_RESTRICT secret,
4986 size_t lane)
4987{
4988 xxh_u64* xacc = (xxh_u64*) acc;
4989 xxh_u8 const* xinput = (xxh_u8 const*) input;
4990 xxh_u8 const* xsecret = (xxh_u8 const*) secret;
4991 XXH_ASSERT(lane < XXH_ACC_NB);
4992 XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
4993 {
4994 xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
4995 xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
4996 xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
4997 xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
4998 }
4999}
5000
5005XXH_FORCE_INLINE void
5006XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
5007 const void* XXH_RESTRICT input,
5008 const void* XXH_RESTRICT secret)
5009{
5010 size_t i;
5011 /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
5012#if defined(__GNUC__) && !defined(__clang__) \
5013 && (defined(__arm__) || defined(__thumb2__)) \
5014 && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
5015 && XXH_SIZE_OPT <= 0
5016# pragma GCC unroll 8
5017#endif
5018 for (i=0; i < XXH_ACC_NB; i++) {
5019 XXH3_scalarRound(acc, input, secret, i);
5020 }
5021}
5022XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
5023
5024
5031XXH_FORCE_INLINE void
5032XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
5033 void const* XXH_RESTRICT secret,
5034 size_t lane)
5035{
5036 xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
5037 const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
5038 XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
5039 XXH_ASSERT(lane < XXH_ACC_NB);
5040 {
5041 xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
5042 xxh_u64 acc64 = xacc[lane];
5043 acc64 = XXH_xorshift64(acc64, 47);
5044 acc64 ^= key64;
5045 acc64 *= XXH_PRIME32_1;
5046 xacc[lane] = acc64;
5047 }
5048}
5049
5054XXH_FORCE_INLINE void
5055XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5056{
5057 size_t i;
5058 for (i=0; i < XXH_ACC_NB; i++) {
5059 XXH3_scalarScrambleRound(acc, secret, i);
5060 }
5061}
5062
5063XXH_FORCE_INLINE void
5064XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5065{
5066 /*
5067 * We need a separate pointer for the hack below,
5068 * which requires a non-const pointer.
5069 * Any decent compiler will optimize this out otherwise.
5070 */
5071 const xxh_u8* kSecretPtr = XXH3_kSecret;
5072 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
5073
5074#if defined(__GNUC__) && defined(__aarch64__)
5075 /*
5076 * UGLY HACK:
5077 * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
5078 * placed sequentially, in order, at the top of the unrolled loop.
5079 *
5080 * While MOVK is great for generating constants (2 cycles for a 64-bit
5081 * constant compared to 4 cycles for LDR), it fights for bandwidth with
5082 * the arithmetic instructions.
5083 *
5084 * I L S
5085 * MOVK
5086 * MOVK
5087 * MOVK
5088 * MOVK
5089 * ADD
5090 * SUB STR
5091 * STR
5092 * By forcing loads from memory (as the asm line causes the compiler to assume
5093 * that XXH3_kSecretPtr has been changed), the pipelines are used more
5094 * efficiently:
5095 * I L S
5096 * LDR
5097 * ADD LDR
5098 * SUB STR
5099 * STR
5100 *
5101 * See XXH3_NEON_LANES for details on the pipsline.
5102 *
5103 * XXH3_64bits_withSeed, len == 256, Snapdragon 835
5104 * without hack: 2654.4 MB/s
5105 * with hack: 3202.9 MB/s
5106 */
5107 XXH_COMPILER_GUARD(kSecretPtr);
5108#endif
5109 { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
5110 int i;
5111 for (i=0; i < nbRounds; i++) {
5112 /*
5113 * The asm hack causes the compiler to assume that kSecretPtr aliases with
5114 * customSecret, and on aarch64, this prevented LDP from merging two
5115 * loads together for free. Putting the loads together before the stores
5116 * properly generates LDP.
5117 */
5118 xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;
5119 xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
5120 XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);
5121 XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
5122 } }
5123}
5124
5125
5126typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
5127typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
5128typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
5129
5130
5131#if (XXH_VECTOR == XXH_AVX512)
5132
5133#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
5134#define XXH3_accumulate XXH3_accumulate_avx512
5135#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
5136#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
5137
5138#elif (XXH_VECTOR == XXH_AVX2)
5139
5140#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
5141#define XXH3_accumulate XXH3_accumulate_avx2
5142#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
5143#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
5144
5145#elif (XXH_VECTOR == XXH_SSE2)
5146
5147#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
5148#define XXH3_accumulate XXH3_accumulate_sse2
5149#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
5150#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
5151
5152#elif (XXH_VECTOR == XXH_NEON)
5153
5154#define XXH3_accumulate_512 XXH3_accumulate_512_neon
5155#define XXH3_accumulate XXH3_accumulate_neon
5156#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
5157#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5158
5159#elif (XXH_VECTOR == XXH_VSX)
5160
5161#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
5162#define XXH3_accumulate XXH3_accumulate_vsx
5163#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
5164#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5165
5166#elif (XXH_VECTOR == XXH_SVE)
5167#define XXH3_accumulate_512 XXH3_accumulate_512_sve
5168#define XXH3_accumulate XXH3_accumulate_sve
5169#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5170#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5171
5172#else /* scalar */
5173
5174#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
5175#define XXH3_accumulate XXH3_accumulate_scalar
5176#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5177#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5178
5179#endif
5180
5181#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
5182# undef XXH3_initCustomSecret
5183# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5184#endif
5185
5186XXH_FORCE_INLINE void
5187XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
5188 const xxh_u8* XXH_RESTRICT input, size_t len,
5189 const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5190 XXH3_f_accumulate f_acc,
5191 XXH3_f_scrambleAcc f_scramble)
5192{
5193 size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
5194 size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
5195 size_t const nb_blocks = (len - 1) / block_len;
5196
5197 size_t n;
5198
5199 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5200
5201 for (n = 0; n < nb_blocks; n++) {
5202 f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
5203 f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
5204 }
5205
5206 /* last partial block */
5207 XXH_ASSERT(len > XXH_STRIPE_LEN);
5208 { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
5209 XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
5210 f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
5211
5212 /* last stripe */
5213 { const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
5214#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
5215 XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
5216 } }
5217}
5218
5219XXH_FORCE_INLINE xxh_u64
5220XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
5221{
5222 return XXH3_mul128_fold64(
5223 acc[0] ^ XXH_readLE64(secret),
5224 acc[1] ^ XXH_readLE64(secret+8) );
5225}
5226
5227static XXH64_hash_t
5228XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
5229{
5230 xxh_u64 result64 = start;
5231 size_t i = 0;
5232
5233 for (i = 0; i < 4; i++) {
5234 result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
5235#if defined(__clang__) /* Clang */ \
5236 && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
5237 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
5238 && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
5239 /*
5240 * UGLY HACK:
5241 * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
5242 * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
5243 * XXH3_64bits, len == 256, Snapdragon 835:
5244 * without hack: 2063.7 MB/s
5245 * with hack: 2560.7 MB/s
5246 */
5247 XXH_COMPILER_GUARD(result64);
5248#endif
5249 }
5250
5251 return XXH3_avalanche(result64);
5252}
5253
5254#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
5255 XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
5256
5257XXH_FORCE_INLINE XXH64_hash_t
5258XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
5259 const void* XXH_RESTRICT secret, size_t secretSize,
5260 XXH3_f_accumulate f_acc,
5261 XXH3_f_scrambleAcc f_scramble)
5262{
5263 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5264
5265 XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
5266
5267 /* converge into final hash */
5268 XXH_STATIC_ASSERT(sizeof(acc) == 64);
5269 /* do not align on 8, so that the secret is different from the accumulator */
5270#define XXH_SECRET_MERGEACCS_START 11
5271 XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
5272 return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
5273}
5274
5275/*
5276 * It's important for performance to transmit secret's size (when it's static)
5277 * so that the compiler can properly optimize the vectorized loop.
5278 * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
5279 * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
5280 * breaks -Og, this is XXH_NO_INLINE.
5281 */
5282XXH3_WITH_SECRET_INLINE XXH64_hash_t
5283XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
5284 XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5285{
5286 (void)seed64;
5287 return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
5288}
5289
5290/*
5291 * It's preferable for performance that XXH3_hashLong is not inlined,
5292 * as it results in a smaller function for small data, easier to the instruction cache.
5293 * Note that inside this no_inline function, we do inline the internal loop,
5294 * and provide a statically defined secret size to allow optimization of vector loop.
5295 */
5296XXH_NO_INLINE XXH_PUREF XXH64_hash_t
5297XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
5298 XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5299{
5300 (void)seed64; (void)secret; (void)secretLen;
5301 return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
5302}
5303
5304/*
5305 * XXH3_hashLong_64b_withSeed():
5306 * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
5307 * and then use this key for long mode hashing.
5308 *
5309 * This operation is decently fast but nonetheless costs a little bit of time.
5310 * Try to avoid it whenever possible (typically when seed==0).
5311 *
5312 * It's important for performance that XXH3_hashLong is not inlined. Not sure
5313 * why (uop cache maybe?), but the difference is large and easily measurable.
5314 */
5315XXH_FORCE_INLINE XXH64_hash_t
5316XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
5317 XXH64_hash_t seed,
5318 XXH3_f_accumulate f_acc,
5319 XXH3_f_scrambleAcc f_scramble,
5320 XXH3_f_initCustomSecret f_initSec)
5321{
5322#if XXH_SIZE_OPT <= 0
5323 if (seed == 0)
5324 return XXH3_hashLong_64b_internal(input, len,
5325 XXH3_kSecret, sizeof(XXH3_kSecret),
5326 f_acc, f_scramble);
5327#endif
5328 { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5329 f_initSec(secret, seed);
5330 return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
5331 f_acc, f_scramble);
5332 }
5333}
5334
5335/*
5336 * It's important for performance that XXH3_hashLong is not inlined.
5337 */
5338XXH_NO_INLINE XXH64_hash_t
5339XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
5340 XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5341{
5342 (void)secret; (void)secretLen;
5343 return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
5344 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
5345}
5346
5347
5348typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
5349 XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
5350
5351XXH_FORCE_INLINE XXH64_hash_t
5352XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
5353 XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
5354 XXH3_hashLong64_f f_hashLong)
5355{
5356 XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
5357 /*
5358 * If an action is to be taken if `secretLen` condition is not respected,
5359 * it should be done here.
5360 * For now, it's a contract pre-condition.
5361 * Adding a check and a branch here would cost performance at every hash.
5362 * Also, note that function signature doesn't offer room to return an error.
5363 */
5364 if (len <= 16)
5365 return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
5366 if (len <= 128)
5367 return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5368 if (len <= XXH3_MIDSIZE_MAX)
5369 return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5370 return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
5371}
5372
5373
5374/* === Public entry point === */
5375
5378{
5379 return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5380}
5381
5384XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
5385{
5386 return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5387}
5388
5391XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
5392{
5393 return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5394}
5395
5397XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
5398{
5399 if (length <= XXH3_MIDSIZE_MAX)
5400 return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5401 return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
5402}
5403
5404
5405/* === XXH3 streaming === */
5406#ifndef XXH_NO_STREAM
5407/*
5408 * Malloc's a pointer that is always aligned to align.
5409 *
5410 * This must be freed with `XXH_alignedFree()`.
5411 *
5412 * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
5413 * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
5414 * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
5415 *
5416 * This underalignment previously caused a rather obvious crash which went
5417 * completely unnoticed due to XXH3_createState() not actually being tested.
5418 * Credit to RedSpah for noticing this bug.
5419 *
5420 * The alignment is done manually: Functions like posix_memalign or _mm_malloc
5421 * are avoided: To maintain portability, we would have to write a fallback
5422 * like this anyways, and besides, testing for the existence of library
5423 * functions without relying on external build tools is impossible.
5424 *
5425 * The method is simple: Overallocate, manually align, and store the offset
5426 * to the original behind the returned pointer.
5427 *
5428 * Align must be a power of 2 and 8 <= align <= 128.
5429 */
5430static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
5431{
5432 XXH_ASSERT(align <= 128 && align >= 8); /* range check */
5433 XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
5434 XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
5435 { /* Overallocate to make room for manual realignment and an offset byte */
5436 xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
5437 if (base != NULL) {
5438 /*
5439 * Get the offset needed to align this pointer.
5440 *
5441 * Even if the returned pointer is aligned, there will always be
5442 * at least one byte to store the offset to the original pointer.
5443 */
5444 size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
5445 /* Add the offset for the now-aligned pointer */
5446 xxh_u8* ptr = base + offset;
5447
5448 XXH_ASSERT((size_t)ptr % align == 0);
5449
5450 /* Store the offset immediately before the returned pointer. */
5451 ptr[-1] = (xxh_u8)offset;
5452 return ptr;
5453 }
5454 return NULL;
5455 }
5456}
5457/*
5458 * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
5459 * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
5460 */
5461static void XXH_alignedFree(void* p)
5462{
5463 if (p != NULL) {
5464 xxh_u8* ptr = (xxh_u8*)p;
5465 /* Get the offset byte we added in XXH_malloc. */
5466 xxh_u8 offset = ptr[-1];
5467 /* Free the original malloc'd pointer */
5468 xxh_u8* base = ptr - offset;
5469 XXH_free(base);
5470 }
5471}
5474{
5475 XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
5476 if (state==NULL) return NULL;
5477 XXH3_INITSTATE(state);
5478 return state;
5479}
5480
5483{
5484 XXH_alignedFree(statePtr);
5485 return XXH_OK;
5486}
5487
5489XXH_PUBLIC_API void
5491{
5492 XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
5493}
5494
5495static void
5496XXH3_reset_internal(XXH3_state_t* statePtr,
5497 XXH64_hash_t seed,
5498 const void* secret, size_t secretSize)
5499{
5500 size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
5501 size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
5502 XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
5503 XXH_ASSERT(statePtr != NULL);
5504 /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
5505 memset(reinterpret_cast<char*>(statePtr) + initStart, 0, initLength);
5506 statePtr->acc[0] = XXH_PRIME32_3;
5507 statePtr->acc[1] = XXH_PRIME64_1;
5508 statePtr->acc[2] = XXH_PRIME64_2;
5509 statePtr->acc[3] = XXH_PRIME64_3;
5510 statePtr->acc[4] = XXH_PRIME64_4;
5511 statePtr->acc[5] = XXH_PRIME32_2;
5512 statePtr->acc[6] = XXH_PRIME64_5;
5513 statePtr->acc[7] = XXH_PRIME32_1;
5514 statePtr->seed = seed;
5515 statePtr->useSeed = (seed != 0);
5516 statePtr->extSecret = (const unsigned char*)secret;
5517 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5518 statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
5519 statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
5520}
5521
5525{
5526 if (statePtr == NULL) return XXH_ERROR;
5527 XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5528 return XXH_OK;
5529}
5530
5533XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
5534{
5535 if (statePtr == NULL) return XXH_ERROR;
5536 XXH3_reset_internal(statePtr, 0, secret, secretSize);
5537 if (secret == NULL) return XXH_ERROR;
5538 if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5539 return XXH_OK;
5540}
5541
5545{
5546 if (statePtr == NULL) return XXH_ERROR;
5547 if (seed==0) return XXH3_64bits_reset(statePtr);
5548 if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5549 XXH3_initCustomSecret(statePtr->customSecret, seed);
5550 XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5551 return XXH_OK;
5552}
5553
5556XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
5557{
5558 if (statePtr == NULL) return XXH_ERROR;
5559 if (secret == NULL) return XXH_ERROR;
5560 if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5561 XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5562 statePtr->useSeed = 1; /* always, even if seed64==0 */
5563 return XXH_OK;
5564}
5565
5583XXH_FORCE_INLINE const xxh_u8 *
5584XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
5585 size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
5586 const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
5587 const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
5588 XXH3_f_accumulate f_acc,
5589 XXH3_f_scrambleAcc f_scramble)
5590{
5591 const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
5592 /* Process full blocks */
5593 if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
5594 /* Process the initial partial block... */
5595 size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
5596
5597 do {
5598 /* Accumulate and scramble */
5599 f_acc(acc, input, initialSecret, nbStripesThisIter);
5600 f_scramble(acc, secret + secretLimit);
5601 input += nbStripesThisIter * XXH_STRIPE_LEN;
5602 nbStripes -= nbStripesThisIter;
5603 /* Then continue the loop with the full block size */
5604 nbStripesThisIter = nbStripesPerBlock;
5605 initialSecret = secret;
5606 } while (nbStripes >= nbStripesPerBlock);
5607 *nbStripesSoFarPtr = 0;
5608 }
5609 /* Process a partial block */
5610 if (nbStripes > 0) {
5611 f_acc(acc, input, initialSecret, nbStripes);
5612 input += nbStripes * XXH_STRIPE_LEN;
5613 *nbStripesSoFarPtr += nbStripes;
5614 }
5615 /* Return end pointer */
5616 return input;
5617}
5618
5619#ifndef XXH3_STREAM_USE_STACK
5620# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
5621# define XXH3_STREAM_USE_STACK 1
5622# endif
5623#endif
5624/*
5625 * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
5626 */
5627XXH_FORCE_INLINE XXH_errorcode
5628XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
5629 const xxh_u8* XXH_RESTRICT input, size_t len,
5630 XXH3_f_accumulate f_acc,
5631 XXH3_f_scrambleAcc f_scramble)
5632{
5633 if (input==NULL) {
5634 XXH_ASSERT(len == 0);
5635 return XXH_OK;
5636 }
5637
5638 XXH_ASSERT(state != NULL);
5639 { const xxh_u8* const bEnd = input + len;
5640 const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5641#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5642 /* For some reason, gcc and MSVC seem to suffer greatly
5643 * when operating accumulators directly into state.
5644 * Operating into stack space seems to enable proper optimization.
5645 * clang, on the other hand, doesn't seem to need this trick */
5646 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
5647 XXH_memcpy(acc, state->acc, sizeof(acc));
5648#else
5649 xxh_u64* XXH_RESTRICT const acc = state->acc;
5650#endif
5651 state->totalLen += len;
5652 XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
5653
5654 /* small input : just fill in tmp buffer */
5655 if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
5656 XXH_memcpy(state->buffer + state->bufferedSize, input, len);
5657 state->bufferedSize += (XXH32_hash_t)len;
5658 return XXH_OK;
5659 }
5660
5661 /* total input is now > XXH3_INTERNALBUFFER_SIZE */
5662 #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
5663 XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
5664
5665 /*
5666 * Internal buffer is partially filled (always, except at beginning)
5667 * Complete it, then consume it.
5668 */
5669 if (state->bufferedSize) {
5670 size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
5671 XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
5672 input += loadSize;
5673 XXH3_consumeStripes(acc,
5674 &state->nbStripesSoFar, state->nbStripesPerBlock,
5675 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
5676 secret, state->secretLimit,
5677 f_acc, f_scramble);
5678 state->bufferedSize = 0;
5679 }
5680 XXH_ASSERT(input < bEnd);
5681 if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
5682 size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
5683 input = XXH3_consumeStripes(acc,
5684 &state->nbStripesSoFar, state->nbStripesPerBlock,
5685 input, nbStripes,
5686 secret, state->secretLimit,
5687 f_acc, f_scramble);
5688 XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5689
5690 }
5691 /* Some remaining input (always) : buffer it */
5692 XXH_ASSERT(input < bEnd);
5693 XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
5694 XXH_ASSERT(state->bufferedSize == 0);
5695 XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
5696 state->bufferedSize = (XXH32_hash_t)(bEnd-input);
5697#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5698 /* save stack accumulators into state */
5699 XXH_memcpy(state->acc, acc, sizeof(acc));
5700#endif
5701 }
5702
5703 return XXH_OK;
5704}
5705
5708XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
5709{
5710 return XXH3_update(state, (const xxh_u8*)input, len,
5711 XXH3_accumulate, XXH3_scrambleAcc);
5712}
5713
5714
5715XXH_FORCE_INLINE void
5716XXH3_digest_long (XXH64_hash_t* acc,
5717 const XXH3_state_t* state,
5718 const unsigned char* secret)
5719{
5720 xxh_u8 lastStripe[XXH_STRIPE_LEN];
5721 const xxh_u8* lastStripePtr;
5722
5723 /*
5724 * Digest on a local copy. This way, the state remains unaltered, and it can
5725 * continue ingesting more input afterwards.
5726 */
5727 XXH_memcpy(acc, state->acc, sizeof(state->acc));
5728 if (state->bufferedSize >= XXH_STRIPE_LEN) {
5729 /* Consume remaining stripes then point to remaining data in buffer */
5730 size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
5731 size_t nbStripesSoFar = state->nbStripesSoFar;
5732 XXH3_consumeStripes(acc,
5733 &nbStripesSoFar, state->nbStripesPerBlock,
5734 state->buffer, nbStripes,
5735 secret, state->secretLimit,
5736 XXH3_accumulate, XXH3_scrambleAcc);
5737 lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
5738 } else { /* bufferedSize < XXH_STRIPE_LEN */
5739 /* Copy to temp buffer */
5740 size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
5741 XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
5742 XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
5743 XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
5744 lastStripePtr = lastStripe;
5745 }
5746 /* Last stripe */
5747 XXH3_accumulate_512(acc,
5748 lastStripePtr,
5749 secret + state->secretLimit - XXH_SECRET_LASTACC_START);
5750}
5751
5754{
5755 const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5756 if (state->totalLen > XXH3_MIDSIZE_MAX) {
5757 XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
5758 XXH3_digest_long(acc, state, secret);
5759 return XXH3_mergeAccs(acc,
5760 secret + XXH_SECRET_MERGEACCS_START,
5761 (xxh_u64)state->totalLen * XXH_PRIME64_1);
5762 }
5763 /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
5764 if (state->useSeed)
5765 return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
5766 return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
5767 secret, state->secretLimit + XXH_STRIPE_LEN);
5768}
5769#endif /* !XXH_NO_STREAM */
5770
5771
5772/* ==========================================
5773 * XXH3 128 bits (a.k.a XXH128)
5774 * ==========================================
5775 * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
5776 * even without counting the significantly larger output size.
5777 *
5778 * For example, extra steps are taken to avoid the seed-dependent collisions
5779 * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
5780 *
5781 * This strength naturally comes at the cost of some speed, especially on short
5782 * lengths. Note that longer hashes are about as fast as the 64-bit version
5783 * due to it using only a slight modification of the 64-bit loop.
5784 *
5785 * XXH128 is also more oriented towards 64-bit machines. It is still extremely
5786 * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
5787 */
5788
5789XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5790XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5791{
5792 /* A doubled version of 1to3_64b with different constants. */
5793 XXH_ASSERT(input != NULL);
5794 XXH_ASSERT(1 <= len && len <= 3);
5795 XXH_ASSERT(secret != NULL);
5796 /*
5797 * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
5798 * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
5799 * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
5800 */
5801 { xxh_u8 const c1 = input[0];
5802 xxh_u8 const c2 = input[len >> 1];
5803 xxh_u8 const c3 = input[len - 1];
5804 xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
5805 | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
5806 xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
5807 xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
5808 xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
5809 xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
5810 xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
5811 XXH128_hash_t h128;
5812 h128.low64 = XXH64_avalanche(keyed_lo);
5813 h128.high64 = XXH64_avalanche(keyed_hi);
5814 return h128;
5815 }
5816}
5817
5818XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5819XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5820{
5821 XXH_ASSERT(input != NULL);
5822 XXH_ASSERT(secret != NULL);
5823 XXH_ASSERT(4 <= len && len <= 8);
5824 seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
5825 { xxh_u32 const input_lo = XXH_readLE32(input);
5826 xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
5827 xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
5828 xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
5829 xxh_u64 const keyed = input_64 ^ bitflip;
5830
5831 /* Shift len to the left to ensure it is even, this avoids even multiplies. */
5832 XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
5833
5834 m128.high64 += (m128.low64 << 1);
5835 m128.low64 ^= (m128.high64 >> 3);
5836
5837 m128.low64 = XXH_xorshift64(m128.low64, 35);
5838 m128.low64 *= PRIME_MX2;
5839 m128.low64 = XXH_xorshift64(m128.low64, 28);
5840 m128.high64 = XXH3_avalanche(m128.high64);
5841 return m128;
5842 }
5843}
5844
5845XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5846XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5847{
5848 XXH_ASSERT(input != NULL);
5849 XXH_ASSERT(secret != NULL);
5850 XXH_ASSERT(9 <= len && len <= 16);
5851 { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
5852 xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
5853 xxh_u64 const input_lo = XXH_readLE64(input);
5854 xxh_u64 input_hi = XXH_readLE64(input + len - 8);
5855 XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
5856 /*
5857 * Put len in the middle of m128 to ensure that the length gets mixed to
5858 * both the low and high bits in the 128x64 multiply below.
5859 */
5860 m128.low64 += (xxh_u64)(len - 1) << 54;
5861 input_hi ^= bitfliph;
5862 /*
5863 * Add the high 32 bits of input_hi to the high 32 bits of m128, then
5864 * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
5865 * the high 64 bits of m128.
5866 *
5867 * The best approach to this operation is different on 32-bit and 64-bit.
5868 */
5869 if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
5870 /*
5871 * 32-bit optimized version, which is more readable.
5872 *
5873 * On 32-bit, it removes an ADC and delays a dependency between the two
5874 * halves of m128.high64, but it generates an extra mask on 64-bit.
5875 */
5876 m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
5877 } else {
5878 /*
5879 * 64-bit optimized (albeit more confusing) version.
5880 *
5881 * Uses some properties of addition and multiplication to remove the mask:
5882 *
5883 * Let:
5884 * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
5885 * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
5886 * c = XXH_PRIME32_2
5887 *
5888 * a + (b * c)
5889 * Inverse Property: x + y - x == y
5890 * a + (b * (1 + c - 1))
5891 * Distributive Property: x * (y + z) == (x * y) + (x * z)
5892 * a + (b * 1) + (b * (c - 1))
5893 * Identity Property: x * 1 == x
5894 * a + b + (b * (c - 1))
5895 *
5896 * Substitute a, b, and c:
5897 * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5898 *
5899 * Since input_hi.hi + input_hi.lo == input_hi, we get this:
5900 * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
5901 */
5902 m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
5903 }
5904 /* m128 ^= XXH_swap64(m128 >> 64); */
5905 m128.low64 ^= XXH_swap64(m128.high64);
5906
5907 { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
5908 XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
5909 h128.high64 += m128.high64 * XXH_PRIME64_2;
5910
5911 h128.low64 = XXH3_avalanche(h128.low64);
5912 h128.high64 = XXH3_avalanche(h128.high64);
5913 return h128;
5914 } }
5915}
5916
5917/*
5918 * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
5919 */
5920XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5921XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5922{
5923 XXH_ASSERT(len <= 16);
5924 { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
5925 if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
5926 if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
5927 { XXH128_hash_t h128;
5928 xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
5929 xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
5930 h128.low64 = XXH64_avalanche(seed ^ bitflipl);
5931 h128.high64 = XXH64_avalanche( seed ^ bitfliph);
5932 return h128;
5933 } }
5934}
5935
5936/*
5937 * A bit slower than XXH3_mix16B, but handles multiply by zero better.
5938 */
5939XXH_FORCE_INLINE XXH128_hash_t
5940XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
5941 const xxh_u8* secret, XXH64_hash_t seed)
5942{
5943 acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
5944 acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
5945 acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
5946 acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
5947 return acc;
5948}
5949
5950
5951XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5952XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5953 const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5954 XXH64_hash_t seed)
5955{
5956 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
5957 XXH_ASSERT(16 < len && len <= 128);
5958
5960 acc.low64 = len * XXH_PRIME64_1;
5961 acc.high64 = 0;
5962
5963#if XXH_SIZE_OPT >= 1
5964 {
5965 /* Smaller, but slightly slower. */
5966 unsigned int i = (unsigned int)(len - 1) / 32;
5967 do {
5968 acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
5969 } while (i-- != 0);
5970 }
5971#else
5972 if (len > 32) {
5973 if (len > 64) {
5974 if (len > 96) {
5975 acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
5976 }
5977 acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
5978 }
5979 acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
5980 }
5981 acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
5982#endif
5983 { XXH128_hash_t h128;
5984 h128.low64 = acc.low64 + acc.high64;
5985 h128.high64 = (acc.low64 * XXH_PRIME64_1)
5986 + (acc.high64 * XXH_PRIME64_4)
5987 + ((len - seed) * XXH_PRIME64_2);
5988 h128.low64 = XXH3_avalanche(h128.low64);
5989 h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
5990 return h128;
5991 }
5992 }
5993}
5994
5995XXH_NO_INLINE XXH_PUREF XXH128_hash_t
5996XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5997 const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5998 XXH64_hash_t seed)
5999{
6000 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
6001 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
6002
6004 unsigned i;
6005 acc.low64 = len * XXH_PRIME64_1;
6006 acc.high64 = 0;
6007 /*
6008 * We set as `i` as offset + 32. We do this so that unchanged
6009 * `len` can be used as upper bound. This reaches a sweet spot
6010 * where both x86 and aarch64 get simple agen and good codegen
6011 * for the loop.
6012 */
6013 for (i = 32; i < 160; i += 32) {
6014 acc = XXH128_mix32B(acc,
6015 input + i - 32,
6016 input + i - 16,
6017 secret + i - 32,
6018 seed);
6019 }
6020 acc.low64 = XXH3_avalanche(acc.low64);
6021 acc.high64 = XXH3_avalanche(acc.high64);
6022 /*
6023 * NB: `i <= len` will duplicate the last 32-bytes if
6024 * len % 32 was zero. This is an unfortunate necessity to keep
6025 * the hash result stable.
6026 */
6027 for (i=160; i <= len; i += 32) {
6028 acc = XXH128_mix32B(acc,
6029 input + i - 32,
6030 input + i - 16,
6031 secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
6032 seed);
6033 }
6034 /* last bytes */
6035 acc = XXH128_mix32B(acc,
6036 input + len - 16,
6037 input + len - 32,
6038 secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
6039 (XXH64_hash_t)0 - seed);
6040
6041 { XXH128_hash_t h128;
6042 h128.low64 = acc.low64 + acc.high64;
6043 h128.high64 = (acc.low64 * XXH_PRIME64_1)
6044 + (acc.high64 * XXH_PRIME64_4)
6045 + ((len - seed) * XXH_PRIME64_2);
6046 h128.low64 = XXH3_avalanche(h128.low64);
6047 h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
6048 return h128;
6049 }
6050 }
6051}
6052
6053XXH_FORCE_INLINE XXH128_hash_t
6054XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
6055 const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6056 XXH3_f_accumulate f_acc,
6057 XXH3_f_scrambleAcc f_scramble)
6058{
6059 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
6060
6061 XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
6062
6063 /* converge into final hash */
6064 XXH_STATIC_ASSERT(sizeof(acc) == 64);
6065 XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6066 { XXH128_hash_t h128;
6067 h128.low64 = XXH3_mergeAccs(acc,
6068 secret + XXH_SECRET_MERGEACCS_START,
6069 (xxh_u64)len * XXH_PRIME64_1);
6070 h128.high64 = XXH3_mergeAccs(acc,
6071 secret + secretSize
6072 - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6073 ~((xxh_u64)len * XXH_PRIME64_2));
6074 return h128;
6075 }
6076}
6077
6078/*
6079 * It's important for performance that XXH3_hashLong() is not inlined.
6080 */
6081XXH_NO_INLINE XXH_PUREF XXH128_hash_t
6082XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
6083 XXH64_hash_t seed64,
6084 const void* XXH_RESTRICT secret, size_t secretLen)
6085{
6086 (void)seed64; (void)secret; (void)secretLen;
6087 return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
6088 XXH3_accumulate, XXH3_scrambleAcc);
6089}
6090
6091/*
6092 * It's important for performance to pass @p secretLen (when it's static)
6093 * to the compiler, so that it can properly optimize the vectorized loop.
6094 *
6095 * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
6096 * breaks -Og, this is XXH_NO_INLINE.
6097 */
6098XXH3_WITH_SECRET_INLINE XXH128_hash_t
6099XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
6100 XXH64_hash_t seed64,
6101 const void* XXH_RESTRICT secret, size_t secretLen)
6102{
6103 (void)seed64;
6104 return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
6105 XXH3_accumulate, XXH3_scrambleAcc);
6106}
6107
6108XXH_FORCE_INLINE XXH128_hash_t
6109XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
6110 XXH64_hash_t seed64,
6111 XXH3_f_accumulate f_acc,
6112 XXH3_f_scrambleAcc f_scramble,
6113 XXH3_f_initCustomSecret f_initSec)
6114{
6115 if (seed64 == 0)
6116 return XXH3_hashLong_128b_internal(input, len,
6117 XXH3_kSecret, sizeof(XXH3_kSecret),
6118 f_acc, f_scramble);
6119 { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6120 f_initSec(secret, seed64);
6121 return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
6122 f_acc, f_scramble);
6123 }
6124}
6125
6126/*
6127 * It's important for performance that XXH3_hashLong is not inlined.
6128 */
6129XXH_NO_INLINE XXH128_hash_t
6130XXH3_hashLong_128b_withSeed(const void* input, size_t len,
6131 XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
6132{
6133 (void)secret; (void)secretLen;
6134 return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
6135 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
6136}
6137
6138typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
6139 XXH64_hash_t, const void* XXH_RESTRICT, size_t);
6140
6141XXH_FORCE_INLINE XXH128_hash_t
6142XXH3_128bits_internal(const void* input, size_t len,
6143 XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
6144 XXH3_hashLong128_f f_hl128)
6145{
6146 XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
6147 /*
6148 * If an action is to be taken if `secret` conditions are not respected,
6149 * it should be done here.
6150 * For now, it's a contract pre-condition.
6151 * Adding a check and a branch here would cost performance at every hash.
6152 */
6153 if (len <= 16)
6154 return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
6155 if (len <= 128)
6156 return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6157 if (len <= XXH3_MIDSIZE_MAX)
6158 return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6159 return f_hl128(input, len, seed64, secret, secretLen);
6160}
6161
6162
6163/* === Public XXH128 API === */
6164
6166XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
6167{
6168 return XXH3_128bits_internal(input, len, 0,
6169 XXH3_kSecret, sizeof(XXH3_kSecret),
6170 XXH3_hashLong_128b_default);
6171}
6172
6175XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
6176{
6177 return XXH3_128bits_internal(input, len, 0,
6178 (const xxh_u8*)secret, secretSize,
6179 XXH3_hashLong_128b_withSecret);
6180}
6181
6184XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6185{
6186 return XXH3_128bits_internal(input, len, seed,
6187 XXH3_kSecret, sizeof(XXH3_kSecret),
6188 XXH3_hashLong_128b_withSeed);
6189}
6190
6193XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6194{
6195 if (len <= XXH3_MIDSIZE_MAX)
6196 return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
6197 return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
6198}
6199
6202XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6203{
6204 return XXH3_128bits_withSeed(input, len, seed);
6205}
6206
6207
6208/* === XXH3 128-bit streaming === */
6209#ifndef XXH_NO_STREAM
6210/*
6211 * All initialization and update functions are identical to 64-bit streaming variant.
6212 * The only difference is the finalization routine.
6213 */
6214
6218{
6219 return XXH3_64bits_reset(statePtr);
6220}
6221
6224XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
6225{
6226 return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
6227}
6228
6232{
6233 return XXH3_64bits_reset_withSeed(statePtr, seed);
6234}
6235
6238XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6239{
6240 return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
6241}
6242
6245XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
6246{
6247 return XXH3_64bits_update(state, input, len);
6248}
6249
6252{
6253 const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6254 if (state->totalLen > XXH3_MIDSIZE_MAX) {
6255 XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
6256 XXH3_digest_long(acc, state, secret);
6257 XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6258 { XXH128_hash_t h128;
6259 h128.low64 = XXH3_mergeAccs(acc,
6260 secret + XXH_SECRET_MERGEACCS_START,
6261 (xxh_u64)state->totalLen * XXH_PRIME64_1);
6262 h128.high64 = XXH3_mergeAccs(acc,
6263 secret + state->secretLimit + XXH_STRIPE_LEN
6264 - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6265 ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
6266 return h128;
6267 }
6268 }
6269 /* len <= XXH3_MIDSIZE_MAX : short code */
6270 if (state->seed)
6271 return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
6272 return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
6273 secret, state->secretLimit + XXH_STRIPE_LEN);
6274}
6275#endif /* !XXH_NO_STREAM */
6276/* 128-bit utility functions */
6277
6278#include <string.h> /* memcmp, memcpy */
6279
6280/* return : 1 is equal, 0 if different */
6283{
6284 /* note : XXH128_hash_t is compact, it has no padding byte */
6285 return !(memcmp(&h1, &h2, sizeof(h1)));
6286}
6287
6288/* This prototype is compatible with stdlib's qsort().
6289 * @return : >0 if *h128_1 > *h128_2
6290 * <0 if *h128_1 < *h128_2
6291 * =0 if *h128_1 == *h128_2 */
6293XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
6294{
6295 XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
6296 XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
6297 int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
6298 /* note : bets that, in most cases, hash values are different */
6299 if (hcmp) return hcmp;
6300 return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
6301}
6302
6303
6304/*====== Canonical representation ======*/
6306XXH_PUBLIC_API void
6308{
6309 XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
6310 if (XXH_CPU_LITTLE_ENDIAN) {
6311 hash.high64 = XXH_swap64(hash.high64);
6312 hash.low64 = XXH_swap64(hash.low64);
6313 }
6314 XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
6315 XXH_memcpy(reinterpret_cast<char*>(dst) + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
6316}
6317
6321{
6323 h.high64 = XXH_readBE64(src);
6324 h.low64 = XXH_readBE64(src->digest + 8);
6325 return h;
6326}
6327
6328
6329
6330/* ==========================================
6331 * Secret generators
6332 * ==========================================
6333 */
6334#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
6335
6336XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
6337{
6338 XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
6339 XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
6340}
6341
6344XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
6345{
6346#if (XXH_DEBUGLEVEL >= 1)
6347 XXH_ASSERT(secretBuffer != NULL);
6348 XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
6349#else
6350 /* production mode, assert() are disabled */
6351 if (secretBuffer == NULL) return XXH_ERROR;
6352 if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
6353#endif
6354
6355 if (customSeedSize == 0) {
6356 customSeed = XXH3_kSecret;
6357 customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6358 }
6359#if (XXH_DEBUGLEVEL >= 1)
6360 XXH_ASSERT(customSeed != NULL);
6361#else
6362 if (customSeed == NULL) return XXH_ERROR;
6363#endif
6364
6365 /* Fill secretBuffer with a copy of customSeed - repeat as needed */
6366 { size_t pos = 0;
6367 while (pos < secretSize) {
6368 size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
6369 memcpy((char*)secretBuffer + pos, customSeed, toCopy);
6370 pos += toCopy;
6371 } }
6372
6373 { size_t const nbSeg16 = secretSize / 16;
6374 size_t n;
6375 XXH128_canonical_t scrambler;
6376 XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
6377 for (n=0; n<nbSeg16; n++) {
6378 XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
6379 XXH3_combine16((char*)secretBuffer + n*16, h128);
6380 }
6381 /* last segment */
6382 XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
6383 }
6384 return XXH_OK;
6385}
6386
6388XXH_PUBLIC_API void
6389XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
6390{
6391 XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6392 XXH3_initCustomSecret(secret, seed);
6393 XXH_ASSERT(secretBuffer != NULL);
6394 memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6395}
6396
6397
6398
6399/* Pop our optimization override from above */
6400#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
6401 && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
6402 && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
6403# pragma GCC pop_options
6404#endif
6405
6406#endif /* XXH_NO_LONG_LONG */
6407
6408#endif /* XXH_NO_XXH3 */
6409
6413#endif /* XXH_IMPLEMENTATION */
6414
6415
6416#if defined (__cplusplus)
6417} /* extern "C" */
6418#endif
double length(const pvec &v)
char data[hepevt_bytes_allocation_ATLAS]
Definition HepEvt.cxx:11
int upper(int c)
static Double_t a
__attribute__((always_inline)) inline uint16_t TileCalibDrawerBase
#define y
#define x
Header file for AthHistogramAlgorithm.
struct XXH32_state_s XXH32_state_t
Streaming functions generate the xxHash value from an incremental input.
Definition xxhash.h:582
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr)
Returns the calculated hash value from an XXH32_state_t.
#define XXH_NOESCAPE
Definition xxhash.h:761
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32(const void *input, size_t length, XXH32_hash_t seed)
Calculates the 32-bit hash of input using xxHash32.
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed)
Resets an XXH32_state_t to begin a new hash.
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t *dst_state, const XXH32_state_t *src_state)
Copies one XXH32_state_t to another.
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr)
Frees an XXH32_state_t.
XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *statePtr, const void *input, size_t length)
Consumes a block of input to an XXH32_state_t.
XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t * XXH32_createState(void)
Allocates an XXH32_state_t.
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, XXH32_hash_t hash)
Converts an XXH32_hash_t to a big endian XXH32_canonical_t.
#define XXH_FALLTHROUGH
Definition xxhash.h:750
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src)
Converts an XXH32_canonical_t to a native XXH32_hash_t.
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed)
64-bit seeded variant of XXH3
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t *dst, XXH128_hash_t hash)
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t *src)
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t * XXH3_createState(void)
XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void *h128_1, XXH_NOESCAPE const void *h128_2)
Compares two XXH128_hash_t This comparator is compatible with stdlib's qsort()/bsearch().
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr)
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr)
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr)
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize)
Custom secret 128-bit variant of XXH3.
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr)
XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
XXH128_isEqual(): Return: 1 if h1 and h2 are equal, 0 if they are not.
struct XXH3_state_s XXH3_state_t
The state struct for the XXH3 streaming API.
Definition xxhash.h:987
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed)
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr)
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void *data, size_t len)
Unseeded 128-bit variant of XXH3.
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *secret, size_t secretSize)
XXH3_64bits_reset_withSecret(): secret is referenced, it must outlive the hash streaming session.
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *secret, size_t secretSize)
Custom secret 128-bit variant of XXH3.
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void *data, size_t len, XXH64_hash_t seed)
Seeded 128-bit variant of XXH3.
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed)
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize)
64-bit variant of XXH3 with a custom "secret".
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void *input, size_t length)
64-bit unseeded variant of XXH3.
XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t *dst_state, XXH_NOESCAPE const XXH3_state_t *src_state)
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
#define XXH3_SECRET_SIZE_MIN
The bare minimum size for a custom secret.
Definition xxhash.h:950
struct XXH64_state_s XXH64_state_t
The opaque state struct for the XXH64 streaming API.
Definition xxhash.h:844
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t *statePtr, XXH64_hash_t seed)
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed)
Calculates the 64-bit hash of input using xxHash64.
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t *dst, XXH64_hash_t hash)
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t *src)
XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t *dst_state, const XXH64_state_t *src_state)
XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t *statePtr, XXH_NOESCAPE const void *input, size_t length)
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t *statePtr)
XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t * XXH64_createState(void)
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr)
unsigned long long XXH64_hash_t
Definition xxhash.h:794
#define XXH_PUREF
Definition xxhash.h:442
XXH_errorcode
Exit code for the streaming API.
Definition xxhash.h:473
#define XXH_PUBLIC_API
Marks a global symbol.
Definition xxhash.h:356
#define XXH_MALLOCF
Definition xxhash.h:443
#define XXH_VERSION_NUMBER
Version number, encoded as two digits each.
Definition xxhash.h:453
#define XXH_CONSTF
Definition xxhash.h:441
XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber(void)
Obtains the xxHash version.
@ XXH_ERROR
Error.
Definition xxhash.h:475
@ XXH_OK
OK.
Definition xxhash.h:474
std::string base
Definition hcg.cxx:81
@ u
Enums for curvilinear frames.
Definition ParamDefs.h:77
void * ptr(T *p)
Definition SGImplSvc.cxx:74
tuple input1
setEventNumber uint32_t
unsigned char digest[sizeof(XXH128_hash_t)]
Definition xxhash.h:1106
The return value from 128-bit hashes.
Definition xxhash.h:1033
XXH64_hash_t low64
value & 0xFFFFFFFFFFFFFFFF
Definition xxhash.h:1034
XXH64_hash_t high64
value >> 64
Definition xxhash.h:1035
Canonical (big endian) representation of XXH32_hash_t.
Definition xxhash.h:685
unsigned char digest[4]
Hash bytes, big endian.
Definition xxhash.h:686
unsigned char digest[sizeof(XXH64_hash_t)]
Definition xxhash.h:854