172 #if defined (__cplusplus)
203 # define XXH_INLINE_ALL
204 # undef XXH_INLINE_ALL
208 # define XXH_PRIVATE_API
209 # undef XXH_PRIVATE_API
223 # define XXH_NAMESPACE
224 # undef XXH_NAMESPACE
227 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
228 && !defined(XXH_INLINE_ALL_31684351384)
230 # define XXH_INLINE_ALL_31684351384
232 # undef XXH_STATIC_LINKING_ONLY
233 # define XXH_STATIC_LINKING_ONLY
235 # undef XXH_PUBLIC_API
236 # if defined(__GNUC__)
237 # define XXH_PUBLIC_API static __inline __attribute__((unused))
238 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) )
239 # define XXH_PUBLIC_API static inline
240 # elif defined(_MSC_VER)
241 # define XXH_PUBLIC_API static __inline
244 # define XXH_PUBLIC_API static
260 # undef XXH_versionNumber
263 # undef XXH32_createState
264 # undef XXH32_freeState
268 # undef XXH32_copyState
269 # undef XXH32_canonicalFromHash
270 # undef XXH32_hashFromCanonical
273 # undef XXH64_createState
274 # undef XXH64_freeState
278 # undef XXH64_copyState
279 # undef XXH64_canonicalFromHash
280 # undef XXH64_hashFromCanonical
283 # undef XXH3_64bits_withSecret
284 # undef XXH3_64bits_withSeed
285 # undef XXH3_64bits_withSecretandSeed
286 # undef XXH3_createState
287 # undef XXH3_freeState
288 # undef XXH3_copyState
289 # undef XXH3_64bits_reset
290 # undef XXH3_64bits_reset_withSeed
291 # undef XXH3_64bits_reset_withSecret
292 # undef XXH3_64bits_update
293 # undef XXH3_64bits_digest
294 # undef XXH3_generateSecret
298 # undef XXH3_128bits_withSeed
299 # undef XXH3_128bits_withSecret
300 # undef XXH3_128bits_reset
301 # undef XXH3_128bits_reset_withSeed
302 # undef XXH3_128bits_reset_withSecret
303 # undef XXH3_128bits_reset_withSecretandSeed
304 # undef XXH3_128bits_update
305 # undef XXH3_128bits_digest
306 # undef XXH128_isEqual
308 # undef XXH128_canonicalFromHash
309 # undef XXH128_hashFromCanonical
311 # undef XXH_NAMESPACE
314 # define XXH_NAMESPACE XXH_INLINE_
322 # define XXH_IPREF(Id) XXH_NAMESPACE ## Id
323 # define XXH_OK XXH_IPREF(XXH_OK)
324 # define XXH_ERROR XXH_IPREF(XXH_ERROR)
325 # define XXH_errorcode XXH_IPREF(XXH_errorcode)
326 # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
327 # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
328 # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
329 # define XXH32_state_s XXH_IPREF(XXH32_state_s)
330 # define XXH32_state_t XXH_IPREF(XXH32_state_t)
331 # define XXH64_state_s XXH_IPREF(XXH64_state_s)
332 # define XXH64_state_t XXH_IPREF(XXH64_state_t)
333 # define XXH3_state_s XXH_IPREF(XXH3_state_s)
334 # define XXH3_state_t XXH_IPREF(XXH3_state_t)
335 # define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
337 # undef XXHASH_H_5627135585666179
338 # undef XXHASH_H_STATIC_13879238742
344 #ifndef XXHASH_H_5627135585666179
345 #define XXHASH_H_5627135585666179 1
348 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
349 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
351 # define XXH_PUBLIC_API __declspec(dllexport)
353 # define XXH_PUBLIC_API __declspec(dllimport)
356 # define XXH_PUBLIC_API
361 # define XXH_CAT(A,B) A##B
362 # define XXH_NAME2(A,B) XXH_CAT(A,B)
363 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
365 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
366 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
367 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
368 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
369 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
370 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
371 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
372 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
373 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
375 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
376 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
377 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
378 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
379 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
380 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
381 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
382 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
383 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
385 # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
386 # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
387 # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
388 # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
389 # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
390 # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
391 # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
392 # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
393 # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
394 # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
395 # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
396 # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
397 # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
398 # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
399 # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
401 # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
402 # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
403 # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
404 # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
405 # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
406 # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
407 # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
408 # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
409 # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
410 # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
411 # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
412 # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
413 # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
414 # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
415 # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
424 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
425 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
427 # define XXH_PUBLIC_API __declspec(dllexport)
429 # define XXH_PUBLIC_API __declspec(dllimport)
432 # define XXH_PUBLIC_API
436 #if defined (__GNUC__)
437 # define XXH_CONSTF __attribute__((const))
438 # define XXH_PUREF __attribute__((pure))
439 # define XXH_MALLOCF __attribute__((malloc))
449 #define XXH_VERSION_MAJOR 0
450 #define XXH_VERSION_MINOR 8
451 #define XXH_VERSION_RELEASE 2
453 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
482 #if defined(XXH_DOXYGEN)
490 #elif !defined (__VMS) \
491 && (defined (__cplusplus) \
492 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
498 # if UINT_MAX == 0xFFFFFFFFUL
499 typedef unsigned int XXH32_hash_t;
500 # elif ULONG_MAX == 0xFFFFFFFFUL
501 typedef unsigned long XXH32_hash_t;
503 # error "unsupported platform: need a 32-bit type"
550 #ifndef XXH_NO_STREAM
686 unsigned char digest[4];
713 #ifdef __has_attribute
714 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
716 # define XXH_HAS_ATTRIBUTE(x) 0
724 #define XXH_C23_VN 201711L
727 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
728 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
730 # define XXH_HAS_C_ATTRIBUTE(x) 0
733 #if defined(__cplusplus) && defined(__has_cpp_attribute)
734 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
736 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
745 #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
746 # define XXH_FALLTHROUGH [[fallthrough]]
747 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
748 # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
750 # define XXH_FALLTHROUGH
758 #if XXH_HAS_ATTRIBUTE(noescape)
759 # define XXH_NOESCAPE __attribute__((noescape))
761 # define XXH_NOESCAPE
771 #ifndef XXH_NO_LONG_LONG
775 #if defined(XXH_DOXYGEN)
782 #elif !defined (__VMS) \
783 && (defined (__cplusplus) \
784 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
789 # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
838 #ifndef XXH_NO_STREAM
950 #define XXH3_SECRET_SIZE_MIN 136
974 #ifndef XXH_NO_STREAM
1062 #ifndef XXH_NO_STREAM
1121 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
1122 #define XXHASH_H_STATIC_13879238742
1149 struct XXH32_state_s {
1150 XXH32_hash_t total_len_32;
1151 XXH32_hash_t large_len;
1153 XXH32_hash_t mem32[4];
1154 XXH32_hash_t memsize;
1155 XXH32_hash_t reserved;
1159 #ifndef XXH_NO_LONG_LONG
1173 struct XXH64_state_s {
1177 XXH32_hash_t memsize;
1178 XXH32_hash_t reserved32;
1184 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
1185 # include <stdalign.h>
1186 # define XXH_ALIGN(n) alignas(n)
1187 #elif defined(__cplusplus) && (__cplusplus >= 201103L)
1189 # define XXH_ALIGN(n) alignas(n)
1190 #elif defined(__GNUC__)
1191 # define XXH_ALIGN(n) __attribute__ ((aligned(n)))
1192 #elif defined(_MSC_VER)
1193 # define XXH_ALIGN(n) __declspec(align(n))
1195 # define XXH_ALIGN(n)
1199 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) \
1200 && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) \
1201 && defined(__GNUC__)
1202 # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1204 # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1214 #define XXH3_INTERNALBUFFER_SIZE 256
1223 #define XXH3_SECRET_DEFAULT_SIZE 192
1247 struct XXH3_state_s {
1250 XXH_ALIGN_MEMBER(64,
unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1252 XXH_ALIGN_MEMBER(64,
unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1254 XXH32_hash_t bufferedSize;
1256 XXH32_hash_t useSeed;
1258 size_t nbStripesSoFar;
1262 size_t nbStripesPerBlock;
1270 const unsigned char* extSecret;
1276 #undef XXH_ALIGN_MEMBER
1289 #define XXH3_INITSTATE(XXH3_state_ptr) \
1291 XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
1292 tmp_xxh3_state_ptr->seed = 0; \
1293 tmp_xxh3_state_ptr->extSecret = NULL; \
1433 #ifndef XXH_NO_STREAM
1448 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1449 # define XXH_IMPLEMENTATION
1482 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1483 || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1484 # define XXH_IMPLEM_13a8737387
1502 # define XXH_NO_LONG_LONG
1503 # undef XXH_NO_LONG_LONG
1554 # define XXH_FORCE_MEMORY_ACCESS 0
1582 # define XXH_SIZE_OPT 0
1612 # define XXH_FORCE_ALIGN_CHECK 0
1634 # define XXH_NO_INLINE_HINTS 0
1651 # define XXH3_INLINE_SECRET 0
1663 # define XXH32_ENDJMP 0
1672 # define XXH_OLD_NAMES
1673 # undef XXH_OLD_NAMES
1683 # define XXH_NO_STREAM
1684 # undef XXH_NO_STREAM
1690 #ifndef XXH_FORCE_MEMORY_ACCESS
1694 # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1695 # define XXH_FORCE_MEMORY_ACCESS 1
1699 #ifndef XXH_SIZE_OPT
1701 # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1702 # define XXH_SIZE_OPT 1
1704 # define XXH_SIZE_OPT 0
1708 #ifndef XXH_FORCE_ALIGN_CHECK
1710 # if XXH_SIZE_OPT >= 1 || \
1711 defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
1712 || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM)
1713 # define XXH_FORCE_ALIGN_CHECK 0
1715 # define XXH_FORCE_ALIGN_CHECK 1
1719 #ifndef XXH_NO_INLINE_HINTS
1720 # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)
1721 # define XXH_NO_INLINE_HINTS 1
1723 # define XXH_NO_INLINE_HINTS 0
1727 #ifndef XXH3_INLINE_SECRET
1728 # if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
1729 || !defined(XXH_INLINE_ALL)
1730 # define XXH3_INLINE_SECRET 0
1732 # define XXH3_INLINE_SECRET 1
1736 #ifndef XXH32_ENDJMP
1738 # define XXH32_ENDJMP 0
1750 #if defined(XXH_NO_STREAM)
1752 #elif defined(XXH_NO_STDLIB)
1763 static XXH_CONSTF void* XXH_malloc(
size_t s) { (void)
s;
return NULL; }
1764 static void XXH_free(
void*
p) { (void)
p; }
1784 static void XXH_free(
void*
p) { free(
p); }
1794 static void* XXH_memcpy(
void*
dest,
const void*
src,
size_t size)
1806 # pragma warning(disable : 4127)
1809 #if XXH_NO_INLINE_HINTS
1810 # if defined(__GNUC__) || defined(__clang__)
1811 # define XXH_FORCE_INLINE static __attribute__((unused))
1813 # define XXH_FORCE_INLINE static
1815 # define XXH_NO_INLINE static
1817 #elif defined(__GNUC__) || defined(__clang__)
1818 # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1819 # define XXH_NO_INLINE static __attribute__((noinline))
1820 #elif defined(_MSC_VER)
1821 # define XXH_FORCE_INLINE static __forceinline
1822 # define XXH_NO_INLINE static __declspec(noinline)
1823 #elif defined (__cplusplus) \
1824 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))
1825 # define XXH_FORCE_INLINE static inline
1826 # define XXH_NO_INLINE static
1828 # define XXH_FORCE_INLINE static
1829 # define XXH_NO_INLINE static
1832 #if XXH3_INLINE_SECRET
1833 # define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
1835 # define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
1850 #ifndef XXH_DEBUGLEVEL
1852 # define XXH_DEBUGLEVEL DEBUGLEVEL
1854 # define XXH_DEBUGLEVEL 0
1858 #if (XXH_DEBUGLEVEL>=1) || __CPPCHECK__
1859 # include <assert.h>
1860 # define XXH_ASSERT(c) assert(c)
1862 # define XXH_ASSERT(c) XXH_ASSUME(c)
1866 #ifndef XXH_STATIC_ASSERT
1867 # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
1868 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
1869 # elif defined(__cplusplus) && (__cplusplus >= 201103L)
1870 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1872 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
1874 # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
1893 #if defined(__GNUC__) || defined(__clang__)
1894 # define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
1896 # define XXH_COMPILER_GUARD(var) ((void)0)
1899 #if defined(__clang__)
1900 # define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
1902 # define XXH_COMPILER_GUARD_W(var) ((void)0)
1908 #if !defined (__VMS) \
1909 && (defined (__cplusplus) \
1910 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
1911 # include <stdint.h>
1914 typedef unsigned char xxh_u8;
1916 typedef XXH32_hash_t xxh_u32;
1918 #ifdef XXH_OLD_NAMES
1919 # define BYTE xxh_u8
1921 # define U32 xxh_u32
1976 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1981 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1987 static xxh_u32 XXH_read32(
const void* memPtr) {
return *(
const xxh_u32*) memPtr; }
1989 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1998 #ifdef XXH_OLD_NAMES
2001 static xxh_u32 XXH_read32(
const void*
ptr)
2004 return *((
const xxh_unalign32*)
ptr);
2013 static xxh_u32 XXH_read32(
const void* memPtr)
2016 XXH_memcpy(&
val, memPtr,
sizeof(
val));
2041 #ifndef XXH_CPU_LITTLE_ENDIAN
2046 # if defined(_WIN32) \
2047 || defined(__LITTLE_ENDIAN__) \
2048 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2049 # define XXH_CPU_LITTLE_ENDIAN 1
2050 # elif defined(__BIG_ENDIAN__) \
2051 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2052 # define XXH_CPU_LITTLE_ENDIAN 0
2060 static int XXH_isLittleEndian(
void)
2066 const union { xxh_u32
u; xxh_u8
c[4]; }
one = { 1 };
2069 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
2079 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2081 #ifdef __has_builtin
2082 # define XXH_HAS_BUILTIN(x) __has_builtin(x)
2084 # define XXH_HAS_BUILTIN(x) 0
2116 #if XXH_HAS_BUILTIN(__builtin_unreachable)
2117 # define XXH_UNREACHABLE() __builtin_unreachable()
2119 #elif defined(_MSC_VER)
2120 # define XXH_UNREACHABLE() __assume(0)
2123 # define XXH_UNREACHABLE()
2126 #if XXH_HAS_BUILTIN(__builtin_assume)
2127 # define XXH_ASSUME(c) __builtin_assume(c)
2129 # define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
2145 #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
2146 && XXH_HAS_BUILTIN(__builtin_rotateleft64)
2147 # define XXH_rotl32 __builtin_rotateleft32
2148 # define XXH_rotl64 __builtin_rotateleft64
2150 #elif defined(_MSC_VER)
2151 # define XXH_rotl32(x,r) _rotl(x,r)
2152 # define XXH_rotl64(x,r) _rotl64(x,r)
2154 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
2155 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
2166 #if defined(_MSC_VER)
2167 # define XXH_swap32 _byteswap_ulong
2168 #elif XXH_GCC_VERSION >= 403
2169 # define XXH_swap32 __builtin_bswap32
2171 static xxh_u32 XXH_swap32 (xxh_u32
x)
2173 return ((
x << 24) & 0xff000000 ) |
2174 ((
x << 8) & 0x00ff0000 ) |
2175 ((
x >> 8) & 0x0000ff00 ) |
2176 ((
x >> 24) & 0x000000ff );
2199 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2201 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(
const void* memPtr)
2203 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2205 | ((xxh_u32)bytePtr[1] << 8)
2206 | ((xxh_u32)bytePtr[2] << 16)
2207 | ((xxh_u32)bytePtr[3] << 24);
2210 XXH_FORCE_INLINE xxh_u32 XXH_readBE32(
const void* memPtr)
2212 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2214 | ((xxh_u32)bytePtr[2] << 8)
2215 | ((xxh_u32)bytePtr[1] << 16)
2216 | ((xxh_u32)bytePtr[0] << 24);
2220 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(
const void*
ptr)
2222 return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(
ptr) : XXH_swap32(XXH_read32(
ptr));
2225 static xxh_u32 XXH_readBE32(
const void*
ptr)
2227 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(
ptr)) : XXH_read32(
ptr);
2231 XXH_FORCE_INLINE xxh_u32
2232 XXH_readLE32_align(
const void*
ptr, XXH_alignment align)
2234 if (align==XXH_unaligned) {
2235 return XXH_readLE32(
ptr);
2237 return XXH_CPU_LITTLE_ENDIAN ? *(
const xxh_u32*)
ptr : XXH_swap32(*(
const xxh_u32*)
ptr);
2261 #define XXH_PRIME32_1 0x9E3779B1U
2262 #define XXH_PRIME32_2 0x85EBCA77U
2263 #define XXH_PRIME32_3 0xC2B2AE3DU
2264 #define XXH_PRIME32_4 0x27D4EB2FU
2265 #define XXH_PRIME32_5 0x165667B1U
2267 #ifdef XXH_OLD_NAMES
2268 # define PRIME32_1 XXH_PRIME32_1
2269 # define PRIME32_2 XXH_PRIME32_2
2270 # define PRIME32_3 XXH_PRIME32_3
2271 # define PRIME32_4 XXH_PRIME32_4
2272 # define PRIME32_5 XXH_PRIME32_5
2286 static xxh_u32 XXH32_round(xxh_u32
acc, xxh_u32
input)
2289 acc = XXH_rotl32(
acc, 13);
2290 acc *= XXH_PRIME32_1;
2291 #if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
2325 XXH_COMPILER_GUARD(
acc);
2340 static xxh_u32 XXH32_avalanche(xxh_u32
hash)
2343 hash *= XXH_PRIME32_2;
2345 hash *= XXH_PRIME32_3;
2350 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
2368 XXH32_finalize(xxh_u32
hash,
const xxh_u8*
ptr,
size_t len, XXH_alignment align)
2370 #define XXH_PROCESS1 do { \
2371 hash += (*ptr++) * XXH_PRIME32_5; \
2372 hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
2375 #define XXH_PROCESS4 do { \
2376 hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2378 hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
2381 if (
ptr==NULL) XXH_ASSERT(len == 0);
2384 if (!XXH32_ENDJMP) {
2394 return XXH32_avalanche(
hash);
2397 case 12: XXH_PROCESS4;
2399 case 8: XXH_PROCESS4;
2401 case 4: XXH_PROCESS4;
2402 return XXH32_avalanche(
hash);
2404 case 13: XXH_PROCESS4;
2406 case 9: XXH_PROCESS4;
2408 case 5: XXH_PROCESS4;
2410 return XXH32_avalanche(
hash);
2412 case 14: XXH_PROCESS4;
2414 case 10: XXH_PROCESS4;
2416 case 6: XXH_PROCESS4;
2419 return XXH32_avalanche(
hash);
2421 case 15: XXH_PROCESS4;
2423 case 11: XXH_PROCESS4;
2425 case 7: XXH_PROCESS4;
2427 case 3: XXH_PROCESS1;
2429 case 2: XXH_PROCESS1;
2431 case 1: XXH_PROCESS1;
2433 case 0:
return XXH32_avalanche(
hash);
2440 #ifdef XXH_OLD_NAMES
2441 # define PROCESS1 XXH_PROCESS1
2442 # define PROCESS4 XXH_PROCESS4
2444 # undef XXH_PROCESS1
2445 # undef XXH_PROCESS4
2457 XXH32_endian_align(
const xxh_u8*
input,
size_t len, xxh_u32 seed, XXH_alignment align)
2461 if (
input==NULL) XXH_ASSERT(len == 0);
2464 const xxh_u8*
const bEnd =
input + len;
2465 const xxh_u8*
const limit = bEnd - 15;
2466 xxh_u32 v1 =
seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2467 xxh_u32
v2 =
seed + XXH_PRIME32_2;
2468 xxh_u32 v3 =
seed + 0;
2469 xxh_u32 v4 =
seed - XXH_PRIME32_1;
2472 v1 = XXH32_round(v1, XXH_get32bits(
input));
input += 4;
2474 v3 = XXH32_round(v3, XXH_get32bits(
input));
input += 4;
2475 v4 = XXH32_round(v4, XXH_get32bits(
input));
input += 4;
2478 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(
v2, 7)
2479 + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2481 h32 =
seed + XXH_PRIME32_5;
2484 h32 += (xxh_u32)len;
2486 return XXH32_finalize(h32,
input, len&15, align);
2492 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2499 if (XXH_FORCE_ALIGN_CHECK) {
2500 if ((((
size_t)
input) & 3) == 0) {
2501 return XXH32_endian_align((
const xxh_u8*)
input, len, seed, XXH_aligned);
2504 return XXH32_endian_align((
const xxh_u8*)
input, len, seed, XXH_unaligned);
2511 #ifndef XXH_NO_STREAM
2527 XXH_memcpy(dstState, srcState,
sizeof(*dstState));
2533 XXH_ASSERT(statePtr != NULL);
2535 memset(statePtr, 0,
sizeof(*statePtr));
2537 statePtr->v[0] =
seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2539 statePtr->v[1] =
seed + XXH_PRIME32_2;
2541 statePtr->v[2] =
seed + 0;
2543 statePtr->v[3] =
seed - XXH_PRIME32_1;
2553 XXH_ASSERT(len == 0);
2557 {
const xxh_u8*
p = (
const xxh_u8*)
input;
2558 const xxh_u8*
const bEnd =
p + len;
2560 state->total_len_32 += (XXH32_hash_t)len;
2561 state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2563 if (state->memsize + len < 16) {
2564 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize,
input, len);
2565 state->memsize += (XXH32_hash_t)len;
2569 if (state->memsize) {
2570 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize,
input, 16-state->memsize);
2571 {
const xxh_u32* p32 = state->mem32;
2572 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2573 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2574 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2575 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2577 p += 16-state->memsize;
2582 const xxh_u8*
const limit = bEnd - 16;
2585 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(
p));
p+=4;
2586 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(
p));
p+=4;
2587 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(
p));
p+=4;
2588 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(
p));
p+=4;
2594 XXH_memcpy(state->mem32,
p, (
size_t)(bEnd-
p));
2608 if (state->large_len) {
2609 h32 = XXH_rotl32(state->v[0], 1)
2610 + XXH_rotl32(state->v[1], 7)
2611 + XXH_rotl32(state->v[2], 12)
2612 + XXH_rotl32(state->v[3], 18);
2614 h32 = state->v[2] + XXH_PRIME32_5;
2617 h32 += state->total_len_32;
2619 return XXH32_finalize(h32, (
const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2642 if (XXH_CPU_LITTLE_ENDIAN)
hash = XXH_swap32(
hash);
2643 XXH_memcpy(dst, &
hash,
sizeof(*dst));
2648 return XXH_readBE32(
src);
2652 #ifndef XXH_NO_LONG_LONG
2666 #ifdef XXH_OLD_NAMES
2667 # define U64 xxh_u64
2670 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2675 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2678 static xxh_u64 XXH_read64(
const void* memPtr)
2680 return *(
const xxh_u64*) memPtr;
2683 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2692 #ifdef XXH_OLD_NAMES
2695 static xxh_u64 XXH_read64(
const void*
ptr)
2698 return *((
const xxh_unalign64*)
ptr);
2707 static xxh_u64 XXH_read64(
const void* memPtr)
2710 XXH_memcpy(&
val, memPtr,
sizeof(
val));
2716 #if defined(_MSC_VER)
2717 # define XXH_swap64 _byteswap_uint64
2718 #elif XXH_GCC_VERSION >= 403
2719 # define XXH_swap64 __builtin_bswap64
2721 static xxh_u64 XXH_swap64(xxh_u64
x)
2723 return ((
x << 56) & 0xff00000000000000ULL) |
2724 ((
x << 40) & 0x00ff000000000000ULL) |
2725 ((
x << 24) & 0x0000ff0000000000ULL) |
2726 ((
x << 8) & 0x000000ff00000000ULL) |
2727 ((
x >> 8) & 0x00000000ff000000ULL) |
2728 ((
x >> 24) & 0x0000000000ff0000ULL) |
2729 ((
x >> 40) & 0x000000000000ff00ULL) |
2730 ((
x >> 56) & 0x00000000000000ffULL);
2736 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2738 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(
const void* memPtr)
2740 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2742 | ((xxh_u64)bytePtr[1] << 8)
2743 | ((xxh_u64)bytePtr[2] << 16)
2744 | ((xxh_u64)bytePtr[3] << 24)
2745 | ((xxh_u64)bytePtr[4] << 32)
2746 | ((xxh_u64)bytePtr[5] << 40)
2747 | ((xxh_u64)bytePtr[6] << 48)
2748 | ((xxh_u64)bytePtr[7] << 56);
2751 XXH_FORCE_INLINE xxh_u64 XXH_readBE64(
const void* memPtr)
2753 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2755 | ((xxh_u64)bytePtr[6] << 8)
2756 | ((xxh_u64)bytePtr[5] << 16)
2757 | ((xxh_u64)bytePtr[4] << 24)
2758 | ((xxh_u64)bytePtr[3] << 32)
2759 | ((xxh_u64)bytePtr[2] << 40)
2760 | ((xxh_u64)bytePtr[1] << 48)
2761 | ((xxh_u64)bytePtr[0] << 56);
2765 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(
const void*
ptr)
2767 return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(
ptr) : XXH_swap64(XXH_read64(
ptr));
2770 static xxh_u64 XXH_readBE64(
const void*
ptr)
2772 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(
ptr)) : XXH_read64(
ptr);
2776 XXH_FORCE_INLINE xxh_u64
2777 XXH_readLE64_align(
const void*
ptr, XXH_alignment align)
2779 if (align==XXH_unaligned)
2780 return XXH_readLE64(
ptr);
2782 return XXH_CPU_LITTLE_ENDIAN ? *(
const xxh_u64*)
ptr : XXH_swap64(*(
const xxh_u64*)
ptr);
2796 #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL
2797 #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL
2798 #define XXH_PRIME64_3 0x165667B19E3779F9ULL
2799 #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL
2800 #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL
2802 #ifdef XXH_OLD_NAMES
2803 # define PRIME64_1 XXH_PRIME64_1
2804 # define PRIME64_2 XXH_PRIME64_2
2805 # define PRIME64_3 XXH_PRIME64_3
2806 # define PRIME64_4 XXH_PRIME64_4
2807 # define PRIME64_5 XXH_PRIME64_5
2811 static xxh_u64 XXH64_round(xxh_u64
acc, xxh_u64
input)
2814 acc = XXH_rotl64(
acc, 31);
2815 acc *= XXH_PRIME64_1;
2819 static xxh_u64 XXH64_mergeRound(xxh_u64
acc, xxh_u64
val)
2821 val = XXH64_round(0,
val);
2823 acc =
acc * XXH_PRIME64_1 + XXH_PRIME64_4;
2828 static xxh_u64 XXH64_avalanche(xxh_u64
hash)
2831 hash *= XXH_PRIME64_2;
2833 hash *= XXH_PRIME64_3;
2839 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
2857 XXH64_finalize(xxh_u64
hash,
const xxh_u8*
ptr,
size_t len, XXH_alignment align)
2859 if (
ptr==NULL) XXH_ASSERT(len == 0);
2862 xxh_u64
const k1 = XXH64_round(0, XXH_get64bits(
ptr));
2865 hash = XXH_rotl64(
hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2869 hash ^= (xxh_u64)(XXH_get32bits(
ptr)) * XXH_PRIME64_1;
2871 hash = XXH_rotl64(
hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2875 hash ^= (*
ptr++) * XXH_PRIME64_5;
2876 hash = XXH_rotl64(
hash, 11) * XXH_PRIME64_1;
2879 return XXH64_avalanche(
hash);
2882 #ifdef XXH_OLD_NAMES
2883 # define PROCESS1_64 XXH_PROCESS1_64
2884 # define PROCESS4_64 XXH_PROCESS4_64
2885 # define PROCESS8_64 XXH_PROCESS8_64
2887 # undef XXH_PROCESS1_64
2888 # undef XXH_PROCESS4_64
2889 # undef XXH_PROCESS8_64
2901 XXH64_endian_align(
const xxh_u8*
input,
size_t len, xxh_u64 seed, XXH_alignment align)
2904 if (
input==NULL) XXH_ASSERT(len == 0);
2907 const xxh_u8*
const bEnd =
input + len;
2908 const xxh_u8*
const limit = bEnd - 31;
2909 xxh_u64 v1 =
seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2910 xxh_u64
v2 =
seed + XXH_PRIME64_2;
2911 xxh_u64 v3 =
seed + 0;
2912 xxh_u64 v4 =
seed - XXH_PRIME64_1;
2915 v1 = XXH64_round(v1, XXH_get64bits(
input));
input+=8;
2917 v3 = XXH64_round(v3, XXH_get64bits(
input));
input+=8;
2918 v4 = XXH64_round(v4, XXH_get64bits(
input));
input+=8;
2921 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(
v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2922 h64 = XXH64_mergeRound(h64, v1);
2923 h64 = XXH64_mergeRound(h64,
v2);
2924 h64 = XXH64_mergeRound(h64, v3);
2925 h64 = XXH64_mergeRound(h64, v4);
2928 h64 =
seed + XXH_PRIME64_5;
2931 h64 += (xxh_u64) len;
2933 return XXH64_finalize(h64,
input, len, align);
2940 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2947 if (XXH_FORCE_ALIGN_CHECK) {
2948 if ((((
size_t)
input) & 7)==0) {
2949 return XXH64_endian_align((
const xxh_u8*)
input, len, seed, XXH_aligned);
2952 return XXH64_endian_align((
const xxh_u8*)
input, len, seed, XXH_unaligned);
2958 #ifndef XXH_NO_STREAM
2974 XXH_memcpy(dstState, srcState,
sizeof(*dstState));
2980 XXH_ASSERT(statePtr != NULL);
2982 memset(statePtr, 0,
sizeof(*statePtr));
2984 statePtr->v[0] =
seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2986 statePtr->v[1] =
seed + XXH_PRIME64_2;
2988 statePtr->v[2] =
seed + 0;
2990 statePtr->v[3] =
seed - XXH_PRIME64_1;
2999 XXH_ASSERT(len == 0);
3003 {
const xxh_u8*
p = (
const xxh_u8*)
input;
3004 const xxh_u8*
const bEnd =
p + len;
3006 state->total_len += len;
3008 if (state->memsize + len < 32) {
3009 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize,
input, len);
3010 state->memsize += (xxh_u32)len;
3014 if (state->memsize) {
3015 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize,
input, 32-state->memsize);
3016 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
3017 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
3018 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
3019 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
3020 p += 32 - state->memsize;
3025 const xxh_u8*
const limit = bEnd - 32;
3028 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(
p));
p+=8;
3029 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(
p));
p+=8;
3030 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(
p));
p+=8;
3031 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(
p));
p+=8;
3037 XXH_memcpy(state->mem64,
p, (
size_t)(bEnd-
p));
3051 if (state->total_len >= 32) {
3052 h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
3053 h64 = XXH64_mergeRound(h64, state->v[0]);
3054 h64 = XXH64_mergeRound(h64, state->v[1]);
3055 h64 = XXH64_mergeRound(h64, state->v[2]);
3056 h64 = XXH64_mergeRound(h64, state->v[3]);
3058 h64 = state->v[2] + XXH_PRIME64_5;
3061 h64 += (xxh_u64) state->total_len;
3063 return XXH64_finalize(h64, (
const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
3073 if (XXH_CPU_LITTLE_ENDIAN)
hash = XXH_swap64(
hash);
3074 XXH_memcpy(dst, &
hash,
sizeof(*dst));
3080 return XXH_readBE64(
src);
3098 #if ((defined(sun) || defined(__sun)) && __cplusplus)
3099 # define XXH_RESTRICT
3100 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
3101 # define XXH_RESTRICT restrict
3102 #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
3103 || (defined (__clang__)) \
3104 || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
3105 || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
3110 # define XXH_RESTRICT __restrict
3112 # define XXH_RESTRICT
3115 #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
3116 || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
3117 || defined(__clang__)
3118 # define XXH_likely(x) __builtin_expect(x, 1)
3119 # define XXH_unlikely(x) __builtin_expect(x, 0)
3121 # define XXH_likely(x) (x)
3122 # define XXH_unlikely(x) (x)
3125 #if defined(__GNUC__) || defined(__clang__)
3126 # if defined(__ARM_FEATURE_SVE)
3127 # include <arm_sve.h>
3129 # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3130 || (defined(_M_ARM) && _M_ARM >= 7) \
3131 || defined(_M_ARM64) || defined(_M_ARM64EC)
3132 # define inline __inline__
3133 # include <arm_neon.h>
3135 # elif defined(__AVX2__)
3136 # include <immintrin.h>
3137 # elif defined(__SSE2__)
3138 # include <emmintrin.h>
3142 #if defined(_MSC_VER)
3143 # include <intrin.h>
3215 #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
3216 # warning "XXH3 is highly inefficient without ARM or Thumb-2."
3234 # define XXH_VECTOR XXH_SCALAR
3244 enum XXH_VECTOR_TYPE {
3267 # define XXH_ACC_ALIGN 8
3272 # define XXH_SCALAR 0
3275 # define XXH_AVX512 3
3282 # if defined(__ARM_FEATURE_SVE)
3283 # define XXH_VECTOR XXH_SVE
3285 defined(__ARM_NEON__) || defined(__ARM_NEON) \
3286 || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) \
3288 defined(_WIN32) || defined(__LITTLE_ENDIAN__) \
3289 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3291 # define XXH_VECTOR XXH_NEON
3292 # elif defined(__AVX512F__)
3293 # define XXH_VECTOR XXH_AVX512
3294 # elif defined(__AVX2__)
3295 # define XXH_VECTOR XXH_AVX2
3296 # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
3297 # define XXH_VECTOR XXH_SSE2
3298 # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
3299 || (defined(__s390x__) && defined(__VEC__)) \
3300 && defined(__GNUC__)
3301 # define XXH_VECTOR XXH_VSX
3303 # define XXH_VECTOR XXH_SCALAR
3308 #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
3310 # pragma warning(once : 4606)
3312 # warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
3315 # define XXH_VECTOR XXH_SCALAR
3322 #ifndef XXH_ACC_ALIGN
3323 # if defined(XXH_X86DISPATCH)
3324 # define XXH_ACC_ALIGN 64
3325 # elif XXH_VECTOR == XXH_SCALAR
3326 # define XXH_ACC_ALIGN 8
3327 # elif XXH_VECTOR == XXH_SSE2
3328 # define XXH_ACC_ALIGN 16
3329 # elif XXH_VECTOR == XXH_AVX2
3330 # define XXH_ACC_ALIGN 32
3331 # elif XXH_VECTOR == XXH_NEON
3332 # define XXH_ACC_ALIGN 16
3333 # elif XXH_VECTOR == XXH_VSX
3334 # define XXH_ACC_ALIGN 16
3335 # elif XXH_VECTOR == XXH_AVX512
3336 # define XXH_ACC_ALIGN 64
3337 # elif XXH_VECTOR == XXH_SVE
3338 # define XXH_ACC_ALIGN 64
3342 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
3343 || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
3344 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3345 #elif XXH_VECTOR == XXH_SVE
3346 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3348 # define XXH_SEC_ALIGN 8
3351 #if defined(__GNUC__) || defined(__clang__)
3352 # define XXH_ALIASING __attribute__((may_alias))
3354 # define XXH_ALIASING
3378 #if XXH_VECTOR == XXH_AVX2 \
3379 && defined(__GNUC__) && !defined(__clang__) \
3380 && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0
3381 # pragma GCC push_options
3382 # pragma GCC optimize("-O2")
3385 #if XXH_VECTOR == XXH_NEON
3394 typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
3409 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3410 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(
void const*
ptr)
3412 return *(xxh_aliasing_uint64x2_t
const *)
ptr;
3415 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(
void const*
ptr)
3417 return vreinterpretq_u64_u8(vld1q_u8((
uint8_t const*)
ptr));
3429 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
3430 XXH_FORCE_INLINE uint64x2_t
3431 XXH_vmlal_low_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3434 __asm__(
"umlal %0.2d, %1.2s, %2.2s" :
"+w" (
acc) :
"w" (lhs),
"w" (rhs));
3437 XXH_FORCE_INLINE uint64x2_t
3438 XXH_vmlal_high_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3441 return vmlal_high_u32(
acc, lhs, rhs);
3445 XXH_FORCE_INLINE uint64x2_t
3446 XXH_vmlal_low_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3448 return vmlal_u32(
acc, vget_low_u32(lhs), vget_low_u32(rhs));
3452 XXH_FORCE_INLINE uint64x2_t
3453 XXH_vmlal_high_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3455 return vmlal_u32(
acc, vget_high_u32(lhs), vget_high_u32(rhs));
3494 # ifndef XXH3_NEON_LANES
3495 # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3496 && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
3497 # define XXH3_NEON_LANES 6
3499 # define XXH3_NEON_LANES XXH_ACC_NB
3512 #if XXH_VECTOR == XXH_VSX
3521 # pragma push_macro("bool")
3522 # pragma push_macro("vector")
3523 # pragma push_macro("pixel")
3529 # if defined(__s390x__)
3530 # include <s390intrin.h>
3532 # include <altivec.h>
3536 # pragma pop_macro("pixel")
3537 # pragma pop_macro("vector")
3538 # pragma pop_macro("bool")
3540 typedef __vector
unsigned long long xxh_u64x2;
3541 typedef __vector
unsigned char xxh_u8x16;
3542 typedef __vector
unsigned xxh_u32x4;
3547 typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
3550 # if defined(__BIG_ENDIAN__) \
3551 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3552 # define XXH_VSX_BE 1
3553 # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3554 # warning "-maltivec=be is not recommended. Please use native endianness."
3555 # define XXH_VSX_BE 1
3557 # define XXH_VSX_BE 0
3562 # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3563 # define XXH_vec_revb vec_revb
3568 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2
val)
3570 xxh_u8x16
const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3571 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3572 return vec_perm(
val,
val, vByteSwap);
3580 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(
const void *
ptr)
3583 XXH_memcpy(&
ret,
ptr,
sizeof(xxh_u64x2));
3596 # if defined(__s390x__)
3598 # define XXH_vec_mulo vec_mulo
3599 # define XXH_vec_mule vec_mule
3600 # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
3603 # define XXH_vec_mulo __builtin_altivec_vmulouw
3604 # define XXH_vec_mule __builtin_altivec_vmuleuw
3608 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4
a, xxh_u32x4
b)
3611 __asm__(
"vmulouw %0, %1, %2" :
"=v" (
result) :
"v" (
a),
"v" (
b));
3614 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4
a, xxh_u32x4
b)
3617 __asm__(
"vmuleuw %0, %1, %2" :
"=v" (
result) :
"v" (
a),
"v" (
b));
3623 #if XXH_VECTOR == XXH_SVE
3624 #define ACCRND(acc, offset) \
3626 svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
3627 svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
3628 svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
3629 svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
3630 svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
3631 svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
3632 svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
3633 acc = svadd_u64_x(mask, acc, mul); \
3640 #if defined(XXH_NO_PREFETCH)
3641 # define XXH_PREFETCH(ptr) (void)(ptr)
3643 # if XXH_SIZE_OPT >= 1
3644 # define XXH_PREFETCH(ptr) (void)(ptr)
3645 # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
3646 # include <mmintrin.h>
3647 # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3648 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3649 # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 , 3 )
3651 # define XXH_PREFETCH(ptr) (void)(ptr)
3660 #define XXH_SECRET_DEFAULT_SIZE 192
3662 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3663 # error "default keyset is not large enough"
3667 XXH_ALIGN(64) static
const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3668 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3669 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3670 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3671 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3672 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3673 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3674 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3675 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3676 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3677 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3678 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3679 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3682 static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;
3683 static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;
3685 #ifdef XXH_OLD_NAMES
3686 # define kSecret XXH3_kSecret
3706 XXH_FORCE_INLINE xxh_u64
3707 XXH_mult32to64(xxh_u64
x, xxh_u64
y)
3711 #elif defined(_MSC_VER) && defined(_M_IX86)
3712 # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3721 # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
3734 XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3751 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
3752 && defined(__SIZEOF_INT128__) \
3753 || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3755 __uint128_t
const product = (__uint128_t)lhs * (__uint128_t)rhs;
3757 r128.
low64 = (xxh_u64)(product);
3758 r128.
high64 = (xxh_u64)(product >> 64);
3768 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
3771 # pragma intrinsic(_umul128)
3773 xxh_u64 product_high;
3774 xxh_u64
const product_low = _umul128(lhs, rhs, &product_high);
3776 r128.
low64 = product_low;
3777 r128.
high64 = product_high;
3785 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
3788 # pragma intrinsic(__umulh)
3791 r128.
low64 = lhs * rhs;
3792 r128.
high64 = __umulh(lhs, rhs);
3841 xxh_u64
const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0
xFFFFFFFF);
3842 xxh_u64
const lo_hi = XXH_mult32to64(lhs & 0
xFFFFFFFF, rhs >> 32);
3843 xxh_u64
const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
3846 xxh_u64
const cross = (lo_lo >> 32) + (hi_lo & 0
xFFFFFFFF) + lo_hi;
3847 xxh_u64
const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
3848 xxh_u64
const lower = (cross << 32) | (lo_lo & 0
xFFFFFFFF);
3868 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3875 XXH_FORCE_INLINE
XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64,
int shift)
3877 XXH_ASSERT(0 <= shift && shift < 64);
3878 return v64 ^ (v64 >> shift);
3887 h64 = XXH_xorshift64(h64, 37);
3889 h64 = XXH_xorshift64(h64, 32);
3898 static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3901 h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
3903 h64 ^= (h64 >> 35) + len ;
3905 return XXH_xorshift64(h64, 28);
3943 XXH3_len_1to3_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3945 XXH_ASSERT(
input != NULL);
3946 XXH_ASSERT(1 <= len && len <= 3);
3947 XXH_ASSERT(secret != NULL);
3954 xxh_u8
const c2 =
input[len >> 1];
3955 xxh_u8
const c3 =
input[len - 1];
3956 xxh_u32
const combined = ((xxh_u32)
c1 << 16) | ((xxh_u32)
c2 << 24)
3957 | ((xxh_u32)
c3 << 0) | ((xxh_u32)len << 8);
3959 xxh_u64
const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
3960 xxh_u64
const keyed = (xxh_u64)combined ^ bitflip;
3961 return XXH64_avalanche(keyed);
3966 XXH3_len_4to8_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3968 XXH_ASSERT(
input != NULL);
3969 XXH_ASSERT(secret != NULL);
3970 XXH_ASSERT(4 <= len && len <= 8);
3971 seed ^= (xxh_u64)XXH_swap32((xxh_u32)
seed) << 32;
3973 xxh_u32
const input2 = XXH_readLE32(
input + len - 4);
3974 xxh_u64
const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
3975 xxh_u64
const input64 = input2 + (((xxh_u64)
input1) << 32);
3976 xxh_u64
const keyed = input64 ^ bitflip;
3977 return XXH3_rrmxmx(keyed, len);
3982 XXH3_len_9to16_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3984 XXH_ASSERT(
input != NULL);
3985 XXH_ASSERT(secret != NULL);
3986 XXH_ASSERT(9 <= len && len <= 16);
3987 { xxh_u64
const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
3988 xxh_u64
const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
3989 xxh_u64
const input_lo = XXH_readLE64(
input) ^ bitflip1;
3990 xxh_u64
const input_hi = XXH_readLE64(
input + len - 8) ^ bitflip2;
3991 xxh_u64
const acc = len
3992 + XXH_swap64(input_lo) + input_hi
3993 + XXH3_mul128_fold64(input_lo, input_hi);
3994 return XXH3_avalanche(
acc);
3999 XXH3_len_0to16_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
4001 XXH_ASSERT(len <= 16);
4002 {
if (XXH_likely(len > 8))
return XXH3_len_9to16_64b(
input, len, secret, seed);
4003 if (XXH_likely(len >= 4))
return XXH3_len_4to8_64b(
input, len, secret, seed);
4004 if (len)
return XXH3_len_1to3_64b(
input, len, secret, seed);
4005 return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
4035 XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(
const xxh_u8* XXH_RESTRICT
input,
4036 const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
4038 #if defined(__GNUC__) && !defined(__clang__) \
4039 && defined(__i386__) && defined(__SSE2__) \
4040 && !defined(XXH_ENABLE_AUTOVECTORIZE)
4056 XXH_COMPILER_GUARD(seed64);
4058 { xxh_u64
const input_lo = XXH_readLE64(
input);
4059 xxh_u64
const input_hi = XXH_readLE64(
input+8);
4060 return XXH3_mul128_fold64(
4061 input_lo ^ (XXH_readLE64(secret) + seed64),
4062 input_hi ^ (XXH_readLE64(secret+8) - seed64)
4069 XXH3_len_17to128_64b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
4070 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
4074 XXH_ASSERT(16 < len && len <= 128);
4076 { xxh_u64
acc = len * XXH_PRIME64_1, acc_end;
4077 #if XXH_SIZE_OPT >= 1
4079 unsigned int i = (
unsigned int)(len - 1) / 32;
4081 acc += XXH3_mix16B(
input+16 *
i, secret+32*
i, seed);
4082 acc += XXH3_mix16B(
input+len-16*(
i+1), secret+32*
i+16, seed);
4086 acc += XXH3_mix16B(
input+0, secret+0, seed);
4087 acc_end = XXH3_mix16B(
input+len-16, secret+16, seed);
4089 acc += XXH3_mix16B(
input+16, secret+32, seed);
4090 acc_end += XXH3_mix16B(
input+len-32, secret+48, seed);
4092 acc += XXH3_mix16B(
input+32, secret+64, seed);
4093 acc_end += XXH3_mix16B(
input+len-48, secret+80, seed);
4096 acc += XXH3_mix16B(
input+48, secret+96, seed);
4097 acc_end += XXH3_mix16B(
input+len-64, secret+112, seed);
4102 return XXH3_avalanche(
acc + acc_end);
4106 #define XXH3_MIDSIZE_MAX 240
4109 XXH3_len_129to240_64b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
4110 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
4114 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4116 #define XXH3_MIDSIZE_STARTOFFSET 3
4117 #define XXH3_MIDSIZE_LASTOFFSET 17
4119 { xxh_u64
acc = len * XXH_PRIME64_1;
4121 unsigned int const nbRounds = (
unsigned int)len / 16;
4123 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4124 for (
i=0;
i<8;
i++) {
4125 acc += XXH3_mix16B(
input+(16*
i), secret+(16*
i), seed);
4129 XXH_ASSERT(nbRounds >= 8);
4130 acc = XXH3_avalanche(
acc);
4131 #if defined(__clang__) \
4132 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) \
4133 && !defined(XXH_ENABLE_AUTOVECTORIZE)
4154 #pragma clang loop vectorize(disable)
4156 for (
i=8 ;
i < nbRounds;
i++) {
4160 XXH_COMPILER_GUARD(
acc);
4161 acc_end += XXH3_mix16B(
input+(16*
i), secret+(16*(
i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
4163 return XXH3_avalanche(
acc + acc_end);
4170 #define XXH_STRIPE_LEN 64
4171 #define XXH_SECRET_CONSUME_RATE 8
4172 #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
4174 #ifdef XXH_OLD_NAMES
4175 # define STRIPE_LEN XXH_STRIPE_LEN
4176 # define ACC_NB XXH_ACC_NB
4179 #ifndef XXH_PREFETCH_DIST
4181 # define XXH_PREFETCH_DIST 320
4183 # if (XXH_VECTOR == XXH_AVX512)
4184 # define XXH_PREFETCH_DIST 512
4186 # define XXH_PREFETCH_DIST 384
4201 #define XXH3_ACCUMULATE_TEMPLATE(name) \
4203 XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
4204 const xxh_u8* XXH_RESTRICT input, \
4205 const xxh_u8* XXH_RESTRICT secret, \
4209 for (n = 0; n < nbStripes; n++ ) { \
4210 const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
4211 XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
4212 XXH3_accumulate_512_##name( \
4215 secret + n*XXH_SECRET_CONSUME_RATE); \
4220 XXH_FORCE_INLINE
void XXH_writeLE64(
void* dst, xxh_u64 v64)
4222 if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
4223 XXH_memcpy(dst, &v64,
sizeof(v64));
4231 #if !defined (__VMS) \
4232 && (defined (__cplusplus) \
4233 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
4234 typedef int64_t xxh_i64;
4237 typedef long long xxh_i64;
4264 #if (XXH_VECTOR == XXH_AVX512) \
4265 || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
4267 #ifndef XXH_TARGET_AVX512
4268 # define XXH_TARGET_AVX512
4271 XXH_FORCE_INLINE XXH_TARGET_AVX512
void
4272 XXH3_accumulate_512_avx512(
void* XXH_RESTRICT
acc,
4273 const void* XXH_RESTRICT
input,
4274 const void* XXH_RESTRICT secret)
4276 __m512i*
const xacc = (__m512i *)
acc;
4277 XXH_ASSERT((((
size_t)
acc) & 63) == 0);
4278 XXH_STATIC_ASSERT(XXH_STRIPE_LEN ==
sizeof(__m512i));
4282 __m512i
const data_vec = _mm512_loadu_si512 (
input);
4284 __m512i
const key_vec = _mm512_loadu_si512 (secret);
4286 __m512i
const data_key = _mm512_xor_si512 (data_vec, key_vec);
4288 __m512i
const data_key_lo = _mm512_srli_epi64 (data_key, 32);
4290 __m512i
const product = _mm512_mul_epu32 (data_key, data_key_lo);
4292 __m512i
const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
4293 __m512i
const sum = _mm512_add_epi64(*xacc, data_swap);
4295 *xacc = _mm512_add_epi64(product,
sum);
4298 XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
4321 XXH_FORCE_INLINE XXH_TARGET_AVX512
void
4322 XXH3_scrambleAcc_avx512(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4324 XXH_ASSERT((((
size_t)
acc) & 63) == 0);
4325 XXH_STATIC_ASSERT(XXH_STRIPE_LEN ==
sizeof(__m512i));
4326 { __m512i*
const xacc = (__m512i*)
acc;
4327 const __m512i prime32 = _mm512_set1_epi32((
int)XXH_PRIME32_1);
4330 __m512i
const acc_vec = *xacc;
4331 __m512i
const shifted = _mm512_srli_epi64 (acc_vec, 47);
4333 __m512i
const key_vec = _mm512_loadu_si512 (secret);
4334 __m512i
const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 );
4337 __m512i
const data_key_hi = _mm512_srli_epi64 (data_key, 32);
4338 __m512i
const prod_lo = _mm512_mul_epu32 (data_key, prime32);
4339 __m512i
const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
4340 *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
4344 XXH_FORCE_INLINE XXH_TARGET_AVX512
void
4345 XXH3_initCustomSecret_avx512(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4347 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
4348 XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
4349 XXH_ASSERT(((
size_t)customSecret & 63) == 0);
4350 (void)(&XXH_writeLE64);
4351 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE /
sizeof(__m512i);
4352 __m512i
const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
4353 __m512i
const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
4355 const __m512i*
const src = (
const __m512i*) ((
const void*) XXH3_kSecret);
4356 __m512i*
const dest = ( __m512i*) customSecret;
4358 XXH_ASSERT(((
size_t)
src & 63) == 0);
4359 XXH_ASSERT(((
size_t)
dest & 63) == 0);
4360 for (
i=0;
i < nbRounds; ++
i) {
4361 dest[
i] = _mm512_add_epi64(_mm512_load_si512(
src +
i), seed);
4367 #if (XXH_VECTOR == XXH_AVX2) \
4368 || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
4370 #ifndef XXH_TARGET_AVX2
4371 # define XXH_TARGET_AVX2
4374 XXH_FORCE_INLINE XXH_TARGET_AVX2
void
4375 XXH3_accumulate_512_avx2(
void* XXH_RESTRICT
acc,
4376 const void* XXH_RESTRICT
input,
4377 const void* XXH_RESTRICT secret)
4379 XXH_ASSERT((((
size_t)
acc) & 31) == 0);
4380 { __m256i*
const xacc = (__m256i *)
acc;
4383 const __m256i*
const xinput = (
const __m256i *)
input;
4386 const __m256i*
const xsecret = (
const __m256i *) secret;
4389 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m256i);
i++) {
4391 __m256i
const data_vec = _mm256_loadu_si256 (xinput+
i);
4393 __m256i
const key_vec = _mm256_loadu_si256 (xsecret+
i);
4395 __m256i
const data_key = _mm256_xor_si256 (data_vec, key_vec);
4397 __m256i
const data_key_lo = _mm256_srli_epi64 (data_key, 32);
4399 __m256i
const product = _mm256_mul_epu32 (data_key, data_key_lo);
4401 __m256i
const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
4402 __m256i
const sum = _mm256_add_epi64(xacc[
i], data_swap);
4404 xacc[
i] = _mm256_add_epi64(product,
sum);
4407 XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
4409 XXH_FORCE_INLINE XXH_TARGET_AVX2
void
4410 XXH3_scrambleAcc_avx2(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4412 XXH_ASSERT((((
size_t)
acc) & 31) == 0);
4413 { __m256i*
const xacc = (__m256i*)
acc;
4416 const __m256i*
const xsecret = (
const __m256i *) secret;
4417 const __m256i prime32 = _mm256_set1_epi32((
int)XXH_PRIME32_1);
4420 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m256i);
i++) {
4422 __m256i
const acc_vec = xacc[
i];
4423 __m256i
const shifted = _mm256_srli_epi64 (acc_vec, 47);
4424 __m256i
const data_vec = _mm256_xor_si256 (acc_vec, shifted);
4426 __m256i
const key_vec = _mm256_loadu_si256 (xsecret+
i);
4427 __m256i
const data_key = _mm256_xor_si256 (data_vec, key_vec);
4430 __m256i
const data_key_hi = _mm256_srli_epi64 (data_key, 32);
4431 __m256i
const prod_lo = _mm256_mul_epu32 (data_key, prime32);
4432 __m256i
const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
4433 xacc[
i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
4438 XXH_FORCE_INLINE XXH_TARGET_AVX2
void XXH3_initCustomSecret_avx2(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4440 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
4441 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE /
sizeof(__m256i)) == 6);
4442 XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
4443 (void)(&XXH_writeLE64);
4444 XXH_PREFETCH(customSecret);
4445 { __m256i
const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
4447 const __m256i*
const src = (
const __m256i*) ((
const void*) XXH3_kSecret);
4448 __m256i*
dest = ( __m256i*) customSecret;
4450 # if defined(__GNUC__) || defined(__clang__)
4456 XXH_COMPILER_GUARD(
dest);
4458 XXH_ASSERT(((
size_t)
src & 31) == 0);
4459 XXH_ASSERT(((
size_t)
dest & 31) == 0);
4462 dest[0] = _mm256_add_epi64(_mm256_load_si256(
src+0), seed);
4463 dest[1] = _mm256_add_epi64(_mm256_load_si256(
src+1), seed);
4464 dest[2] = _mm256_add_epi64(_mm256_load_si256(
src+2), seed);
4465 dest[3] = _mm256_add_epi64(_mm256_load_si256(
src+3), seed);
4466 dest[4] = _mm256_add_epi64(_mm256_load_si256(
src+4), seed);
4467 dest[5] = _mm256_add_epi64(_mm256_load_si256(
src+5), seed);
4474 #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
4476 #ifndef XXH_TARGET_SSE2
4477 # define XXH_TARGET_SSE2
4480 XXH_FORCE_INLINE XXH_TARGET_SSE2
void
4481 XXH3_accumulate_512_sse2(
void* XXH_RESTRICT
acc,
4482 const void* XXH_RESTRICT
input,
4483 const void* XXH_RESTRICT secret)
4486 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4487 { __m128i*
const xacc = (__m128i *)
acc;
4490 const __m128i*
const xinput = (
const __m128i *)
input;
4493 const __m128i*
const xsecret = (
const __m128i *) secret;
4496 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m128i);
i++) {
4498 __m128i
const data_vec = _mm_loadu_si128 (xinput+
i);
4500 __m128i
const key_vec = _mm_loadu_si128 (xsecret+
i);
4502 __m128i
const data_key = _mm_xor_si128 (data_vec, key_vec);
4504 __m128i
const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4506 __m128i
const product = _mm_mul_epu32 (data_key, data_key_lo);
4508 __m128i
const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
4509 __m128i
const sum = _mm_add_epi64(xacc[
i], data_swap);
4511 xacc[
i] = _mm_add_epi64(product,
sum);
4514 XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
4516 XXH_FORCE_INLINE XXH_TARGET_SSE2
void
4517 XXH3_scrambleAcc_sse2(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4519 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4520 { __m128i*
const xacc = (__m128i*)
acc;
4523 const __m128i*
const xsecret = (
const __m128i *) secret;
4524 const __m128i prime32 = _mm_set1_epi32((
int)XXH_PRIME32_1);
4527 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m128i);
i++) {
4529 __m128i
const acc_vec = xacc[
i];
4530 __m128i
const shifted = _mm_srli_epi64 (acc_vec, 47);
4531 __m128i
const data_vec = _mm_xor_si128 (acc_vec, shifted);
4533 __m128i
const key_vec = _mm_loadu_si128 (xsecret+
i);
4534 __m128i
const data_key = _mm_xor_si128 (data_vec, key_vec);
4537 __m128i
const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4538 __m128i
const prod_lo = _mm_mul_epu32 (data_key, prime32);
4539 __m128i
const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
4540 xacc[
i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
4545 XXH_FORCE_INLINE XXH_TARGET_SSE2
void XXH3_initCustomSecret_sse2(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4547 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4548 (void)(&XXH_writeLE64);
4549 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE /
sizeof(__m128i);
4551 # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
4553 XXH_ALIGN(16)
const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
4554 __m128i
const seed = _mm_load_si128((__m128i
const*)seed64x2);
4556 __m128i
const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
4560 const void*
const src16 = XXH3_kSecret;
4561 __m128i* dst16 = (__m128i*) customSecret;
4562 # if defined(__GNUC__) || defined(__clang__)
4568 XXH_COMPILER_GUARD(dst16);
4570 XXH_ASSERT(((
size_t)src16 & 15) == 0);
4571 XXH_ASSERT(((
size_t)dst16 & 15) == 0);
4573 for (
i=0;
i < nbRounds; ++
i) {
4574 dst16[
i] = _mm_add_epi64(_mm_load_si128((
const __m128i *)src16+
i), seed);
4580 #if (XXH_VECTOR == XXH_NEON)
4583 XXH_FORCE_INLINE
void
4584 XXH3_scalarRound(
void* XXH_RESTRICT
acc,
void const* XXH_RESTRICT
input,
4585 void const* XXH_RESTRICT secret,
size_t lane);
4587 XXH_FORCE_INLINE
void
4588 XXH3_scalarScrambleRound(
void* XXH_RESTRICT
acc,
4589 void const* XXH_RESTRICT secret,
size_t lane);
4611 XXH_FORCE_INLINE
void
4612 XXH3_accumulate_512_neon(
void* XXH_RESTRICT
acc,
4613 const void* XXH_RESTRICT
input,
4614 const void* XXH_RESTRICT secret)
4616 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4617 XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4619 xxh_aliasing_uint64x2_t*
const xacc = (xxh_aliasing_uint64x2_t*)
acc;
4626 for (
i = XXH3_NEON_LANES;
i < XXH_ACC_NB;
i++) {
4627 XXH3_scalarRound(
acc,
input, secret,
i);
4631 for (;
i+1 < XXH3_NEON_LANES / 2;
i+=2) {
4633 uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (
i * 16));
4634 uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((
i+1) * 16));
4636 uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (
i * 16));
4637 uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((
i+1) * 16));
4639 uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
4640 uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
4642 uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
4643 uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
4658 uint32x4x2_t unzipped = vuzpq_u32(
4659 vreinterpretq_u32_u64(data_key_1),
4660 vreinterpretq_u32_u64(data_key_2)
4663 uint32x4_t data_key_lo = unzipped.val[0];
4665 uint32x4_t data_key_hi = unzipped.val[1];
4673 uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
4674 uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
4687 XXH_COMPILER_GUARD_W(sum_1);
4688 XXH_COMPILER_GUARD_W(sum_2);
4690 xacc[
i] = vaddq_u64(xacc[
i], sum_1);
4691 xacc[
i+1] = vaddq_u64(xacc[
i+1], sum_2);
4694 for (;
i < XXH3_NEON_LANES / 2;
i++) {
4696 uint64x2_t data_vec = XXH_vld1q_u64(xinput + (
i * 16));
4698 uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (
i * 16));
4700 uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
4702 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4705 uint32x2_t data_key_lo = vmovn_u64(data_key);
4707 uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4709 uint64x2_t
sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
4711 XXH_COMPILER_GUARD_W(
sum);
4713 xacc[
i] = vaddq_u64 (xacc[
i],
sum);
4717 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
4719 XXH_FORCE_INLINE
void
4720 XXH3_scrambleAcc_neon(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4722 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4724 { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*)
acc;
4726 uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
4730 for (
i = XXH3_NEON_LANES;
i < XXH_ACC_NB;
i++) {
4731 XXH3_scalarScrambleRound(
acc, secret,
i);
4733 for (
i=0;
i < XXH3_NEON_LANES / 2;
i++) {
4735 uint64x2_t acc_vec = xacc[
i];
4736 uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
4737 uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
4740 uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (
i * 16));
4741 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4744 uint32x2_t data_key_lo = vmovn_u64(data_key);
4745 uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4764 uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4766 prod_hi = vshlq_n_u64(prod_hi, 32);
4768 xacc[
i] = vmlal_u32(prod_hi, data_key_lo, prime);
4774 #if (XXH_VECTOR == XXH_VSX)
4776 XXH_FORCE_INLINE
void
4777 XXH3_accumulate_512_vsx(
void* XXH_RESTRICT
acc,
4778 const void* XXH_RESTRICT
input,
4779 const void* XXH_RESTRICT secret)
4782 xxh_aliasing_u64x2*
const xacc = (xxh_aliasing_u64x2*)
acc;
4783 xxh_u8
const*
const xinput = (xxh_u8
const*)
input;
4784 xxh_u8
const*
const xsecret = (xxh_u8
const*) secret;
4785 xxh_u64x2
const v32 = { 32, 32 };
4787 for (
i = 0;
i < XXH_STRIPE_LEN /
sizeof(xxh_u64x2);
i++) {
4789 xxh_u64x2
const data_vec = XXH_vec_loadu(xinput + 16*
i);
4791 xxh_u64x2
const key_vec = XXH_vec_loadu(xsecret + 16*
i);
4792 xxh_u64x2
const data_key = data_vec ^ key_vec;
4794 xxh_u32x4
const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4796 xxh_u64x2
const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4798 xxh_u64x2 acc_vec = xacc[
i];
4803 acc_vec += vec_permi(data_vec, data_vec, 2);
4805 acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4810 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
4812 XXH_FORCE_INLINE
void
4813 XXH3_scrambleAcc_vsx(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4815 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4817 { xxh_aliasing_u64x2*
const xacc = (xxh_aliasing_u64x2*)
acc;
4818 const xxh_u8*
const xsecret = (
const xxh_u8*) secret;
4820 xxh_u64x2
const v32 = { 32, 32 };
4821 xxh_u64x2
const v47 = { 47, 47 };
4822 xxh_u32x4
const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
4824 for (
i = 0;
i < XXH_STRIPE_LEN /
sizeof(xxh_u64x2);
i++) {
4826 xxh_u64x2
const acc_vec = xacc[
i];
4827 xxh_u64x2
const data_vec = acc_vec ^ (acc_vec >> v47);
4830 xxh_u64x2
const key_vec = XXH_vec_loadu(xsecret + 16*
i);
4831 xxh_u64x2
const data_key = data_vec ^ key_vec;
4835 xxh_u64x2
const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
4837 xxh_u64x2
const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
4838 xacc[
i] = prod_odd + (prod_even << v32);
4844 #if (XXH_VECTOR == XXH_SVE)
4846 XXH_FORCE_INLINE
void
4847 XXH3_accumulate_512_sve(
void* XXH_RESTRICT
acc,
4848 const void* XXH_RESTRICT
input,
4849 const void* XXH_RESTRICT secret)
4854 svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4856 if (element_count >= 8) {
4857 svbool_t
mask = svptrue_pat_b64(SV_VL8);
4858 svuint64_t vacc = svld1_u64(
mask, xacc);
4860 svst1_u64(
mask, xacc, vacc);
4861 }
else if (element_count == 2) {
4862 svbool_t
mask = svptrue_pat_b64(SV_VL2);
4863 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4864 svuint64_t
acc1 = svld1_u64(
mask, xacc + 2);
4865 svuint64_t
acc2 = svld1_u64(
mask, xacc + 4);
4866 svuint64_t
acc3 = svld1_u64(
mask, xacc + 6);
4871 svst1_u64(
mask, xacc + 0, acc0);
4876 svbool_t
mask = svptrue_pat_b64(SV_VL4);
4877 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4878 svuint64_t
acc1 = svld1_u64(
mask, xacc + 4);
4881 svst1_u64(
mask, xacc + 0, acc0);
4886 XXH_FORCE_INLINE
void
4887 XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT
acc,
4888 const xxh_u8* XXH_RESTRICT
input,
4889 const xxh_u8* XXH_RESTRICT secret,
4892 if (nbStripes != 0) {
4896 svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4898 if (element_count >= 8) {
4899 svbool_t
mask = svptrue_pat_b64(SV_VL8);
4900 svuint64_t vacc = svld1_u64(
mask, xacc + 0);
4903 svprfd(
mask, xinput + 128, SV_PLDL1STRM);
4908 }
while (nbStripes != 0);
4910 svst1_u64(
mask, xacc + 0, vacc);
4911 }
else if (element_count == 2) {
4912 svbool_t
mask = svptrue_pat_b64(SV_VL2);
4913 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4914 svuint64_t
acc1 = svld1_u64(
mask, xacc + 2);
4915 svuint64_t
acc2 = svld1_u64(
mask, xacc + 4);
4916 svuint64_t
acc3 = svld1_u64(
mask, xacc + 6);
4918 svprfd(
mask, xinput + 128, SV_PLDL1STRM);
4926 }
while (nbStripes != 0);
4928 svst1_u64(
mask, xacc + 0, acc0);
4933 svbool_t
mask = svptrue_pat_b64(SV_VL4);
4934 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4935 svuint64_t
acc1 = svld1_u64(
mask, xacc + 4);
4937 svprfd(
mask, xinput + 128, SV_PLDL1STRM);
4943 }
while (nbStripes != 0);
4945 svst1_u64(
mask, xacc + 0, acc0);
4955 #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
4970 XXH_FORCE_INLINE xxh_u64
4971 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64
acc)
4975 __asm__(
"umaddl %x0, %w1, %w2, %x3" :
"=r" (
ret) :
"r" (lhs),
"r" (rhs),
"r" (
acc));
4979 XXH_FORCE_INLINE xxh_u64
4980 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64
acc)
4982 return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) +
acc;
4993 XXH_FORCE_INLINE
void
4994 XXH3_scalarRound(
void* XXH_RESTRICT
acc,
4995 void const* XXH_RESTRICT
input,
4996 void const* XXH_RESTRICT secret,
4999 xxh_u64* xacc = (xxh_u64*)
acc;
5000 xxh_u8
const* xinput = (xxh_u8
const*)
input;
5001 xxh_u8
const* xsecret = (xxh_u8
const*) secret;
5002 XXH_ASSERT(lane < XXH_ACC_NB);
5003 XXH_ASSERT(((
size_t)
acc & (XXH_ACC_ALIGN-1)) == 0);
5005 xxh_u64
const data_val = XXH_readLE64(xinput + lane * 8);
5006 xxh_u64
const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
5007 xacc[lane ^ 1] += data_val;
5008 xacc[lane] = XXH_mult32to64_add64(data_key , data_key >> 32, xacc[lane]);
5016 XXH_FORCE_INLINE
void
5017 XXH3_accumulate_512_scalar(
void* XXH_RESTRICT
acc,
5018 const void* XXH_RESTRICT
input,
5019 const void* XXH_RESTRICT secret)
5023 #if defined(__GNUC__) && !defined(__clang__) \
5024 && (defined(__arm__) || defined(__thumb2__)) \
5025 && defined(__ARM_FEATURE_UNALIGNED) \
5026 && XXH_SIZE_OPT <= 0
5027 # pragma GCC unroll 8
5029 for (
i=0;
i < XXH_ACC_NB;
i++) {
5030 XXH3_scalarRound(
acc,
input, secret,
i);
5033 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
5042 XXH_FORCE_INLINE
void
5043 XXH3_scalarScrambleRound(
void* XXH_RESTRICT
acc,
5044 void const* XXH_RESTRICT secret,
5047 xxh_u64*
const xacc = (xxh_u64*)
acc;
5048 const xxh_u8*
const xsecret = (
const xxh_u8*) secret;
5049 XXH_ASSERT((((
size_t)
acc) & (XXH_ACC_ALIGN-1)) == 0);
5050 XXH_ASSERT(lane < XXH_ACC_NB);
5052 xxh_u64
const key64 = XXH_readLE64(xsecret + lane * 8);
5053 xxh_u64 acc64 = xacc[lane];
5054 acc64 = XXH_xorshift64(acc64, 47);
5056 acc64 *= XXH_PRIME32_1;
5065 XXH_FORCE_INLINE
void
5066 XXH3_scrambleAcc_scalar(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
5069 for (
i=0;
i < XXH_ACC_NB;
i++) {
5070 XXH3_scalarScrambleRound(
acc, secret,
i);
5074 XXH_FORCE_INLINE
void
5075 XXH3_initCustomSecret_scalar(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5082 const xxh_u8* kSecretPtr = XXH3_kSecret;
5083 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
5085 #if defined(__GNUC__) && defined(__aarch64__)
5118 XXH_COMPILER_GUARD(kSecretPtr);
5120 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
5122 for (
i=0;
i < nbRounds;
i++) {
5129 xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*
i) + seed64;
5130 xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*
i + 8) - seed64;
5131 XXH_writeLE64((xxh_u8*)customSecret + 16*
i, lo);
5132 XXH_writeLE64((xxh_u8*)customSecret + 16*
i + 8, hi);
5137 typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT,
const xxh_u8* XXH_RESTRICT,
const xxh_u8* XXH_RESTRICT, size_t);
5138 typedef void (*XXH3_f_scrambleAcc)(
void* XXH_RESTRICT,
const void*);
5139 typedef void (*XXH3_f_initCustomSecret)(
void* XXH_RESTRICT, xxh_u64);
5142 #if (XXH_VECTOR == XXH_AVX512)
5144 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
5145 #define XXH3_accumulate XXH3_accumulate_avx512
5146 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
5147 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
5149 #elif (XXH_VECTOR == XXH_AVX2)
5151 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
5152 #define XXH3_accumulate XXH3_accumulate_avx2
5153 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
5154 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
5156 #elif (XXH_VECTOR == XXH_SSE2)
5158 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
5159 #define XXH3_accumulate XXH3_accumulate_sse2
5160 #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
5161 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
5163 #elif (XXH_VECTOR == XXH_NEON)
5165 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
5166 #define XXH3_accumulate XXH3_accumulate_neon
5167 #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
5168 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5170 #elif (XXH_VECTOR == XXH_VSX)
5172 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
5173 #define XXH3_accumulate XXH3_accumulate_vsx
5174 #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
5175 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5177 #elif (XXH_VECTOR == XXH_SVE)
5178 #define XXH3_accumulate_512 XXH3_accumulate_512_sve
5179 #define XXH3_accumulate XXH3_accumulate_sve
5180 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5181 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5185 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
5186 #define XXH3_accumulate XXH3_accumulate_scalar
5187 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5188 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5192 #if XXH_SIZE_OPT >= 1
5193 # undef XXH3_initCustomSecret
5194 # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5197 XXH_FORCE_INLINE
void
5198 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT
acc,
5199 const xxh_u8* XXH_RESTRICT
input,
size_t len,
5200 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
5201 XXH3_f_accumulate f_acc,
5202 XXH3_f_scrambleAcc f_scramble)
5204 size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
5205 size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
5206 size_t const nb_blocks = (len - 1) / block_len;
5212 for (
n = 0;
n < nb_blocks;
n++) {
5213 f_acc(
acc,
input +
n*block_len, secret, nbStripesPerBlock);
5214 f_scramble(
acc, secret + secretSize - XXH_STRIPE_LEN);
5218 XXH_ASSERT(len > XXH_STRIPE_LEN);
5219 {
size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
5220 XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
5221 f_acc(
acc,
input + nb_blocks*block_len, secret, nbStripes);
5224 {
const xxh_u8*
const p =
input + len - XXH_STRIPE_LEN;
5225 #define XXH_SECRET_LASTACC_START 7
5226 XXH3_accumulate_512(
acc,
p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
5230 XXH_FORCE_INLINE xxh_u64
5231 XXH3_mix2Accs(
const xxh_u64* XXH_RESTRICT
acc,
const xxh_u8* XXH_RESTRICT secret)
5233 return XXH3_mul128_fold64(
5234 acc[0] ^ XXH_readLE64(secret),
5235 acc[1] ^ XXH_readLE64(secret+8) );
5239 XXH3_mergeAccs(
const xxh_u64* XXH_RESTRICT
acc,
const xxh_u8* XXH_RESTRICT secret, xxh_u64
start)
5241 xxh_u64 result64 =
start;
5244 for (
i = 0;
i < 4;
i++) {
5245 result64 += XXH3_mix2Accs(
acc+2*
i, secret + 16*
i);
5246 #if defined(__clang__) \
5247 && (defined(__arm__) || defined(__thumb__)) \
5248 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) \
5249 && !defined(XXH_ENABLE_AUTOVECTORIZE)
5258 XXH_COMPILER_GUARD(result64);
5262 return XXH3_avalanche(result64);
5265 #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
5266 XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
5269 XXH3_hashLong_64b_internal(
const void* XXH_RESTRICT
input,
size_t len,
5270 const void* XXH_RESTRICT secret,
size_t secretSize,
5271 XXH3_f_accumulate f_acc,
5272 XXH3_f_scrambleAcc f_scramble)
5274 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64
acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5276 XXH3_hashLong_internal_loop(
acc, (
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretSize, f_acc, f_scramble);
5279 XXH_STATIC_ASSERT(
sizeof(
acc) == 64);
5281 #define XXH_SECRET_MERGEACCS_START 11
5282 XXH_ASSERT(secretSize >=
sizeof(
acc) + XXH_SECRET_MERGEACCS_START);
5283 return XXH3_mergeAccs(
acc, (
const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
5294 XXH3_hashLong_64b_withSecret(
const void* XXH_RESTRICT
input,
size_t len,
5295 XXH64_hash_t seed64,
const xxh_u8* XXH_RESTRICT secret,
size_t secretLen)
5298 return XXH3_hashLong_64b_internal(
input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
5308 XXH3_hashLong_64b_default(
const void* XXH_RESTRICT
input,
size_t len,
5309 XXH64_hash_t seed64,
const xxh_u8* XXH_RESTRICT secret,
size_t secretLen)
5311 (void)seed64; (void)secret; (void)secretLen;
5312 return XXH3_hashLong_64b_internal(
input, len, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
5327 XXH3_hashLong_64b_withSeed_internal(
const void*
input,
size_t len,
5329 XXH3_f_accumulate f_acc,
5330 XXH3_f_scrambleAcc f_scramble,
5331 XXH3_f_initCustomSecret f_initSec)
5333 #if XXH_SIZE_OPT <= 0
5335 return XXH3_hashLong_64b_internal(
input, len,
5336 XXH3_kSecret,
sizeof(XXH3_kSecret),
5339 { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5340 f_initSec(secret, seed);
5341 return XXH3_hashLong_64b_internal(
input, len, secret,
sizeof(secret),
5350 XXH3_hashLong_64b_withSeed(
const void* XXH_RESTRICT
input,
size_t len,
5351 XXH64_hash_t seed,
const xxh_u8* XXH_RESTRICT secret,
size_t secretLen)
5353 (void)secret; (void)secretLen;
5354 return XXH3_hashLong_64b_withSeed_internal(
input, len, seed,
5355 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
5359 typedef XXH64_hash_t (*XXH3_hashLong64_f)(
const void* XXH_RESTRICT, size_t,
5363 XXH3_64bits_internal(
const void* XXH_RESTRICT
input,
size_t len,
5364 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen,
5365 XXH3_hashLong64_f f_hashLong)
5376 return XXH3_len_0to16_64b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, seed64);
5378 return XXH3_len_17to128_64b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
5379 if (len <= XXH3_MIDSIZE_MAX)
5380 return XXH3_len_129to240_64b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
5381 return f_hashLong(
input, len, seed64, (
const xxh_u8*)secret, secretLen);
5390 return XXH3_64bits_internal(
input,
length, 0, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5397 return XXH3_64bits_internal(
input,
length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5404 return XXH3_64bits_internal(
input,
length, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5410 if (
length <= XXH3_MIDSIZE_MAX)
5411 return XXH3_64bits_internal(
input,
length, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), NULL);
5412 return XXH3_hashLong_64b_withSecret(
input,
length, seed, (
const xxh_u8*)secret, secretSize);
5417 #ifndef XXH_NO_STREAM
5441 static XXH_MALLOCF void* XXH_alignedMalloc(
size_t s,
size_t align)
5443 XXH_ASSERT(align <= 128 && align >= 8);
5444 XXH_ASSERT((align & (align-1)) == 0);
5445 XXH_ASSERT(
s != 0 &&
s < (
s + align));
5447 xxh_u8*
base = (xxh_u8*)XXH_malloc(
s + align);
5455 size_t offset = align - ((size_t)
base & (align - 1));
5459 XXH_ASSERT((
size_t)
ptr % align == 0);
5472 static void XXH_alignedFree(
void*
p)
5475 xxh_u8*
ptr = (xxh_u8*)
p;
5487 if (state==NULL)
return NULL;
5488 XXH3_INITSTATE(state);
5495 XXH_alignedFree(statePtr);
5503 XXH_memcpy(dst_state, src_state,
sizeof(*dst_state));
5509 const void* secret,
size_t secretSize)
5511 size_t const initStart = offsetof(
XXH3_state_t, bufferedSize);
5512 size_t const initLength = offsetof(
XXH3_state_t, nbStripesPerBlock) - initStart;
5513 XXH_ASSERT(offsetof(
XXH3_state_t, nbStripesPerBlock) > initStart);
5514 XXH_ASSERT(statePtr != NULL);
5517 memset((
char*)statePtr + initStart, 0, initLength);
5519 statePtr->acc[0] = XXH_PRIME32_3;
5521 statePtr->acc[1] = XXH_PRIME64_1;
5523 statePtr->acc[2] = XXH_PRIME64_2;
5525 statePtr->acc[3] = XXH_PRIME64_3;
5527 statePtr->acc[4] = XXH_PRIME64_4;
5529 statePtr->acc[5] = XXH_PRIME32_2;
5531 statePtr->acc[6] = XXH_PRIME64_5;
5533 statePtr->acc[7] = XXH_PRIME32_1;
5535 statePtr->seed =
seed;
5537 statePtr->useSeed = (
seed != 0);
5539 statePtr->extSecret = (
const unsigned char*)secret;
5541 statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
5542 statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
5550 XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5559 XXH3_reset_internal(statePtr, 0, secret, secretSize);
5571 if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5572 XXH3_initCustomSecret(statePtr->customSecret, seed);
5573 XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5584 XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5585 statePtr->useSeed = 1;
5606 XXH_FORCE_INLINE
const xxh_u8 *
5607 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT
acc,
5608 size_t* XXH_RESTRICT nbStripesSoFarPtr,
size_t nbStripesPerBlock,
5609 const xxh_u8* XXH_RESTRICT
input,
size_t nbStripes,
5610 const xxh_u8* XXH_RESTRICT secret,
size_t secretLimit,
5611 XXH3_f_accumulate f_acc,
5612 XXH3_f_scrambleAcc f_scramble)
5614 const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
5616 if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
5618 size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
5622 f_acc(
acc,
input, initialSecret, nbStripesThisIter);
5623 f_scramble(
acc, secret + secretLimit);
5624 input += nbStripesThisIter * XXH_STRIPE_LEN;
5625 nbStripes -= nbStripesThisIter;
5627 nbStripesThisIter = nbStripesPerBlock;
5628 initialSecret = secret;
5629 }
while (nbStripes >= nbStripesPerBlock);
5630 *nbStripesSoFarPtr = 0;
5633 if (nbStripes > 0) {
5634 f_acc(
acc,
input, initialSecret, nbStripes);
5635 input += nbStripes * XXH_STRIPE_LEN;
5636 *nbStripesSoFarPtr += nbStripes;
5642 #ifndef XXH3_STREAM_USE_STACK
5643 # if XXH_SIZE_OPT <= 0 && !defined(__clang__)
5644 # define XXH3_STREAM_USE_STACK 1
5652 const xxh_u8* XXH_RESTRICT
input,
size_t len,
5653 XXH3_f_accumulate f_acc,
5654 XXH3_f_scrambleAcc f_scramble)
5657 XXH_ASSERT(len == 0);
5661 XXH_ASSERT(state != NULL);
5662 {
const xxh_u8*
const bEnd =
input + len;
5664 const unsigned char*
const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5665 #
if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5670 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64
acc[8];
5671 XXH_memcpy(
acc, state->acc,
sizeof(
acc));
5673 xxh_u64* XXH_RESTRICT
const acc = state->acc;
5675 state->totalLen += len;
5676 XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
5679 if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
5680 XXH_memcpy(state->buffer + state->bufferedSize,
input, len);
5681 state->bufferedSize += (XXH32_hash_t)len;
5686 #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
5687 XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);
5693 if (state->bufferedSize) {
5694 size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
5695 XXH_memcpy(state->buffer + state->bufferedSize,
input, loadSize);
5697 XXH3_consumeStripes(
acc,
5698 &state->nbStripesSoFar, state->nbStripesPerBlock,
5699 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
5700 secret, state->secretLimit,
5702 state->bufferedSize = 0;
5704 XXH_ASSERT(
input < bEnd);
5705 if (bEnd -
input > XXH3_INTERNALBUFFER_SIZE) {
5706 size_t nbStripes = (size_t)(bEnd - 1 -
input) / XXH_STRIPE_LEN;
5708 &state->nbStripesSoFar, state->nbStripesPerBlock,
5710 secret, state->secretLimit,
5712 XXH_memcpy(state->buffer +
sizeof(state->buffer) - XXH_STRIPE_LEN,
input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5716 XXH_ASSERT(
input < bEnd);
5717 XXH_ASSERT(bEnd -
input <= XXH3_INTERNALBUFFER_SIZE);
5718 XXH_ASSERT(state->bufferedSize == 0);
5719 XXH_memcpy(state->buffer,
input, (
size_t)(bEnd-
input));
5720 state->bufferedSize = (XXH32_hash_t)(bEnd-
input);
5721 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5723 XXH_memcpy(state->acc,
acc,
sizeof(
acc));
5734 return XXH3_update(state, (
const xxh_u8*)
input, len,
5735 XXH3_accumulate, XXH3_scrambleAcc);
5739 XXH_FORCE_INLINE
void
5742 const unsigned char* secret)
5744 xxh_u8 lastStripe[XXH_STRIPE_LEN];
5745 const xxh_u8* lastStripePtr;
5751 XXH_memcpy(
acc, state->acc,
sizeof(state->acc));
5752 if (state->bufferedSize >= XXH_STRIPE_LEN) {
5754 size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
5755 size_t nbStripesSoFar = state->nbStripesSoFar;
5756 XXH3_consumeStripes(
acc,
5757 &nbStripesSoFar, state->nbStripesPerBlock,
5758 state->buffer, nbStripes,
5759 secret, state->secretLimit,
5760 XXH3_accumulate, XXH3_scrambleAcc);
5761 lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
5764 size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
5765 XXH_ASSERT(state->bufferedSize > 0);
5766 XXH_memcpy(lastStripe, state->buffer +
sizeof(state->buffer) - catchupSize, catchupSize);
5767 XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
5768 lastStripePtr = lastStripe;
5771 XXH3_accumulate_512(
acc,
5773 secret + state->secretLimit - XXH_SECRET_LASTACC_START);
5779 const unsigned char*
const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5780 if (state->totalLen > XXH3_MIDSIZE_MAX) {
5782 XXH3_digest_long(
acc, state, secret);
5783 return XXH3_mergeAccs(
acc,
5784 secret + XXH_SECRET_MERGEACCS_START,
5785 (xxh_u64)state->totalLen * XXH_PRIME64_1);
5791 secret, state->secretLimit + XXH_STRIPE_LEN);
5814 XXH3_len_1to3_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5817 XXH_ASSERT(
input != NULL);
5818 XXH_ASSERT(1 <= len && len <= 3);
5819 XXH_ASSERT(secret != NULL);
5826 xxh_u8
const c2 =
input[len >> 1];
5827 xxh_u8
const c3 =
input[len - 1];
5828 xxh_u32
const combinedl = ((xxh_u32)
c1 <<16) | ((xxh_u32)
c2 << 24)
5829 | ((xxh_u32)
c3 << 0) | ((xxh_u32)len << 8);
5830 xxh_u32
const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
5832 xxh_u64
const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
5834 xxh_u64
const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
5835 xxh_u64
const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
5836 xxh_u64
const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
5838 h128.
low64 = XXH64_avalanche(keyed_lo);
5839 h128.
high64 = XXH64_avalanche(keyed_hi);
5845 XXH3_len_4to8_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5847 XXH_ASSERT(
input != NULL);
5848 XXH_ASSERT(secret != NULL);
5849 XXH_ASSERT(4 <= len && len <= 8);
5850 seed ^= (xxh_u64)XXH_swap32((xxh_u32)
seed) << 32;
5851 { xxh_u32
const input_lo = XXH_readLE32(
input);
5852 xxh_u32
const input_hi = XXH_readLE32(
input + len - 4);
5853 xxh_u64
const input_64 = input_lo + ((xxh_u64)input_hi << 32);
5854 xxh_u64
const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
5855 xxh_u64
const keyed = input_64 ^ bitflip;
5858 XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
5864 m128.
low64 *= PRIME_MX2;
5872 XXH3_len_9to16_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5874 XXH_ASSERT(
input != NULL);
5875 XXH_ASSERT(secret != NULL);
5876 XXH_ASSERT(9 <= len && len <= 16);
5877 { xxh_u64
const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
5878 xxh_u64
const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
5879 xxh_u64
const input_lo = XXH_readLE64(
input);
5880 xxh_u64 input_hi = XXH_readLE64(
input + len - 8);
5881 XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
5886 m128.
low64 += (xxh_u64)(len - 1) << 54;
5887 input_hi ^= bitfliph;
5895 if (
sizeof(
void *) <
sizeof(xxh_u64)) {
5902 m128.
high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
5928 m128.
high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
5947 XXH3_len_0to16_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5949 XXH_ASSERT(len <= 16);
5950 {
if (len > 8)
return XXH3_len_9to16_128b(
input, len, secret, seed);
5951 if (len >= 4)
return XXH3_len_4to8_128b(
input, len, secret, seed);
5952 if (len)
return XXH3_len_1to3_128b(
input, len, secret, seed);
5954 xxh_u64
const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
5955 xxh_u64
const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
5956 h128.
low64 = XXH64_avalanche(seed ^ bitflipl);
5957 h128.
high64 = XXH64_avalanche( seed ^ bitfliph);
5966 XXH128_mix32B(
XXH128_hash_t acc,
const xxh_u8* input_1,
const xxh_u8* input_2,
5969 acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
5970 acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
5971 acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
5972 acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
5978 XXH3_len_17to128_128b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
5979 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
5983 XXH_ASSERT(16 < len && len <= 128);
5986 acc.low64 = len * XXH_PRIME64_1;
5989 #if XXH_SIZE_OPT >= 1
5992 unsigned int i = (
unsigned int)(len - 1) / 32;
6011 h128.
high64 = (
acc.low64 * XXH_PRIME64_1)
6012 + (
acc.high64 * XXH_PRIME64_4)
6013 + ((len -
seed) * XXH_PRIME64_2);
6022 XXH3_len_129to240_128b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
6023 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
6027 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
6031 acc.low64 = len * XXH_PRIME64_1;
6039 for (
i = 32;
i < 160;
i += 32) {
6046 acc.low64 = XXH3_avalanche(
acc.low64);
6047 acc.high64 = XXH3_avalanche(
acc.high64);
6053 for (
i=160;
i <= len;
i += 32) {
6057 secret + XXH3_MIDSIZE_STARTOFFSET +
i - 160,
6069 h128.
high64 = (
acc.low64 * XXH_PRIME64_1)
6070 + (
acc.high64 * XXH_PRIME64_4)
6071 + ((len -
seed) * XXH_PRIME64_2);
6080 XXH3_hashLong_128b_internal(
const void* XXH_RESTRICT
input,
size_t len,
6081 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
6082 XXH3_f_accumulate f_acc,
6083 XXH3_f_scrambleAcc f_scramble)
6085 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64
acc[XXH_ACC_NB] = XXH3_INIT_ACC;
6087 XXH3_hashLong_internal_loop(
acc, (
const xxh_u8*)
input, len, secret, secretSize, f_acc, f_scramble);
6090 XXH_STATIC_ASSERT(
sizeof(
acc) == 64);
6091 XXH_ASSERT(secretSize >=
sizeof(
acc) + XXH_SECRET_MERGEACCS_START);
6094 secret + XXH_SECRET_MERGEACCS_START,
6095 (xxh_u64)len * XXH_PRIME64_1);
6098 -
sizeof(
acc) - XXH_SECRET_MERGEACCS_START,
6099 ~((xxh_u64)len * XXH_PRIME64_2));
6108 XXH3_hashLong_128b_default(
const void* XXH_RESTRICT
input,
size_t len,
6110 const void* XXH_RESTRICT secret,
size_t secretLen)
6112 (void)seed64; (void)secret; (void)secretLen;
6113 return XXH3_hashLong_128b_internal(
input, len, XXH3_kSecret,
sizeof(XXH3_kSecret),
6114 XXH3_accumulate, XXH3_scrambleAcc);
6125 XXH3_hashLong_128b_withSecret(
const void* XXH_RESTRICT
input,
size_t len,
6127 const void* XXH_RESTRICT secret,
size_t secretLen)
6130 return XXH3_hashLong_128b_internal(
input, len, (
const xxh_u8*)secret, secretLen,
6131 XXH3_accumulate, XXH3_scrambleAcc);
6135 XXH3_hashLong_128b_withSeed_internal(
const void* XXH_RESTRICT
input,
size_t len,
6137 XXH3_f_accumulate f_acc,
6138 XXH3_f_scrambleAcc f_scramble,
6139 XXH3_f_initCustomSecret f_initSec)
6142 return XXH3_hashLong_128b_internal(
input, len,
6143 XXH3_kSecret,
sizeof(XXH3_kSecret),
6145 { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6146 f_initSec(secret, seed64);
6147 return XXH3_hashLong_128b_internal(
input, len, (
const xxh_u8*)secret,
sizeof(secret),
6156 XXH3_hashLong_128b_withSeed(
const void*
input,
size_t len,
6157 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen)
6159 (void)secret; (void)secretLen;
6160 return XXH3_hashLong_128b_withSeed_internal(
input, len, seed64,
6161 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
6164 typedef XXH128_hash_t (*XXH3_hashLong128_f)(
const void* XXH_RESTRICT, size_t,
6168 XXH3_128bits_internal(
const void*
input,
size_t len,
6169 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen,
6170 XXH3_hashLong128_f f_hl128)
6180 return XXH3_len_0to16_128b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, seed64);
6182 return XXH3_len_17to128_128b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
6183 if (len <= XXH3_MIDSIZE_MAX)
6184 return XXH3_len_129to240_128b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
6185 return f_hl128(
input, len, seed64, secret, secretLen);
6194 return XXH3_128bits_internal(
input, len, 0,
6195 XXH3_kSecret,
sizeof(XXH3_kSecret),
6196 XXH3_hashLong_128b_default);
6203 return XXH3_128bits_internal(
input, len, 0,
6204 (
const xxh_u8*)secret, secretSize,
6205 XXH3_hashLong_128b_withSecret);
6212 return XXH3_128bits_internal(
input, len, seed,
6213 XXH3_kSecret,
sizeof(XXH3_kSecret),
6214 XXH3_hashLong_128b_withSeed);
6221 if (len <= XXH3_MIDSIZE_MAX)
6222 return XXH3_128bits_internal(
input, len, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), NULL);
6223 return XXH3_hashLong_128b_withSecret(
input, len, seed, secret, secretSize);
6235 #ifndef XXH_NO_STREAM
6266 return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
6279 const unsigned char*
const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6280 if (state->totalLen > XXH3_MIDSIZE_MAX) {
6282 XXH3_digest_long(
acc, state, secret);
6283 XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >=
sizeof(
acc) + XXH_SECRET_MERGEACCS_START);
6286 secret + XXH_SECRET_MERGEACCS_START,
6287 (xxh_u64)state->totalLen * XXH_PRIME64_1);
6289 secret + state->secretLimit + XXH_STRIPE_LEN
6290 -
sizeof(
acc) - XXH_SECRET_MERGEACCS_START,
6291 ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
6299 secret, state->secretLimit + XXH_STRIPE_LEN);
6311 return !(memcmp(&
h1, &h2,
sizeof(
h1)));
6325 if (hcmp)
return hcmp;
6336 if (XXH_CPU_LITTLE_ENDIAN) {
6337 hash.high64 = XXH_swap64(
hash.high64);
6338 hash.low64 = XXH_swap64(
hash.low64);
6340 XXH_memcpy(dst, &
hash.high64,
sizeof(
hash.high64));
6341 XXH_memcpy((
char*)dst +
sizeof(
hash.high64), &
hash.low64,
sizeof(
hash.low64));
6349 h.high64 = XXH_readBE64(
src);
6350 h.low64 = XXH_readBE64(
src->digest + 8);
6360 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
6362 XXH_FORCE_INLINE
void XXH3_combine16(
void* dst,
XXH128_hash_t h128)
6364 XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.
low64 );
6365 XXH_writeLE64( (
char*)dst+8, XXH_readLE64((
char*)dst+8) ^ h128.
high64 );
6370 XXH3_generateSecret(
XXH_NOESCAPE void* secretBuffer,
size_t secretSize,
XXH_NOESCAPE const void* customSeed,
size_t customSeedSize)
6372 #if (XXH_DEBUGLEVEL >= 1)
6373 XXH_ASSERT(secretBuffer != NULL);
6377 if (secretBuffer == NULL)
return XXH_ERROR;
6381 if (customSeedSize == 0) {
6382 customSeed = XXH3_kSecret;
6383 customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6385 #if (XXH_DEBUGLEVEL >= 1)
6386 XXH_ASSERT(customSeed != NULL);
6388 if (customSeed == NULL)
return XXH_ERROR;
6393 while (
pos < secretSize) {
6394 size_t const toCopy = XXH_MIN((secretSize -
pos), customSeedSize);
6395 memcpy((
char*)secretBuffer +
pos, customSeed, toCopy);
6399 {
size_t const nbSeg16 = secretSize / 16;
6403 for (
n=0;
n<nbSeg16;
n++) {
6404 XXH128_hash_t const h128 = XXH128(&scrambler,
sizeof(scrambler),
n);
6405 XXH3_combine16((
char*)secretBuffer +
n*16, h128);
6417 XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6418 XXH3_initCustomSecret(secret, seed);
6419 XXH_ASSERT(secretBuffer != NULL);
6421 memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6427 #if XXH_VECTOR == XXH_AVX2 \
6428 && defined(__GNUC__) && !defined(__clang__) \
6429 && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0
6430 # pragma GCC pop_options
6443 #if defined (__cplusplus)