172 #if defined (__cplusplus)
203 # define XXH_INLINE_ALL
204 # undef XXH_INLINE_ALL
208 # define XXH_PRIVATE_API
209 # undef XXH_PRIVATE_API
223 # define XXH_NAMESPACE
224 # undef XXH_NAMESPACE
227 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
228 && !defined(XXH_INLINE_ALL_31684351384)
230 # define XXH_INLINE_ALL_31684351384
232 # undef XXH_STATIC_LINKING_ONLY
233 # define XXH_STATIC_LINKING_ONLY
235 # undef XXH_PUBLIC_API
236 # if defined(__GNUC__)
237 # define XXH_PUBLIC_API static __inline __attribute__((unused))
238 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) )
239 # define XXH_PUBLIC_API static inline
240 # elif defined(_MSC_VER)
241 # define XXH_PUBLIC_API static __inline
244 # define XXH_PUBLIC_API static
260 # undef XXH_versionNumber
263 # undef XXH32_createState
264 # undef XXH32_freeState
268 # undef XXH32_copyState
269 # undef XXH32_canonicalFromHash
270 # undef XXH32_hashFromCanonical
273 # undef XXH64_createState
274 # undef XXH64_freeState
278 # undef XXH64_copyState
279 # undef XXH64_canonicalFromHash
280 # undef XXH64_hashFromCanonical
283 # undef XXH3_64bits_withSecret
284 # undef XXH3_64bits_withSeed
285 # undef XXH3_64bits_withSecretandSeed
286 # undef XXH3_createState
287 # undef XXH3_freeState
288 # undef XXH3_copyState
289 # undef XXH3_64bits_reset
290 # undef XXH3_64bits_reset_withSeed
291 # undef XXH3_64bits_reset_withSecret
292 # undef XXH3_64bits_update
293 # undef XXH3_64bits_digest
294 # undef XXH3_generateSecret
298 # undef XXH3_128bits_withSeed
299 # undef XXH3_128bits_withSecret
300 # undef XXH3_128bits_reset
301 # undef XXH3_128bits_reset_withSeed
302 # undef XXH3_128bits_reset_withSecret
303 # undef XXH3_128bits_reset_withSecretandSeed
304 # undef XXH3_128bits_update
305 # undef XXH3_128bits_digest
306 # undef XXH128_isEqual
308 # undef XXH128_canonicalFromHash
309 # undef XXH128_hashFromCanonical
311 # undef XXH_NAMESPACE
314 # define XXH_NAMESPACE XXH_INLINE_
322 # define XXH_IPREF(Id) XXH_NAMESPACE ## Id
323 # define XXH_OK XXH_IPREF(XXH_OK)
324 # define XXH_ERROR XXH_IPREF(XXH_ERROR)
325 # define XXH_errorcode XXH_IPREF(XXH_errorcode)
326 # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
327 # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
328 # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
329 # define XXH32_state_s XXH_IPREF(XXH32_state_s)
330 # define XXH32_state_t XXH_IPREF(XXH32_state_t)
331 # define XXH64_state_s XXH_IPREF(XXH64_state_s)
332 # define XXH64_state_t XXH_IPREF(XXH64_state_t)
333 # define XXH3_state_s XXH_IPREF(XXH3_state_s)
334 # define XXH3_state_t XXH_IPREF(XXH3_state_t)
335 # define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
337 # undef XXHASH_H_5627135585666179
338 # undef XXHASH_H_STATIC_13879238742
344 #ifndef XXHASH_H_5627135585666179
345 #define XXHASH_H_5627135585666179 1
348 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
349 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
351 # define XXH_PUBLIC_API __declspec(dllexport)
353 # define XXH_PUBLIC_API __declspec(dllimport)
356 # define XXH_PUBLIC_API
361 # define XXH_CAT(A,B) A##B
362 # define XXH_NAME2(A,B) XXH_CAT(A,B)
363 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
365 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
366 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
367 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
368 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
369 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
370 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
371 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
372 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
373 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
375 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
376 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
377 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
378 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
379 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
380 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
381 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
382 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
383 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
385 # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
386 # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
387 # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
388 # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
389 # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
390 # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
391 # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
392 # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
393 # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
394 # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
395 # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
396 # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
397 # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
398 # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
399 # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
401 # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
402 # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
403 # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
404 # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
405 # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
406 # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
407 # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
408 # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
409 # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
410 # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
411 # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
412 # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
413 # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
414 # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
415 # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
424 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
425 # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
427 # define XXH_PUBLIC_API __declspec(dllexport)
429 # define XXH_PUBLIC_API __declspec(dllimport)
432 # define XXH_PUBLIC_API
436 #if defined (__GNUC__)
437 # define XXH_CONSTF __attribute__((const))
438 # define XXH_PUREF __attribute__((pure))
439 # define XXH_MALLOCF __attribute__((malloc))
449 #define XXH_VERSION_MAJOR 0
450 #define XXH_VERSION_MINOR 8
451 #define XXH_VERSION_RELEASE 2
453 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
482 #if defined(XXH_DOXYGEN)
490 #elif !defined (__VMS) \
491 && (defined (__cplusplus) \
492 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
498 # if UINT_MAX == 0xFFFFFFFFUL
499 typedef unsigned int XXH32_hash_t;
500 # elif ULONG_MAX == 0xFFFFFFFFUL
501 typedef unsigned long XXH32_hash_t;
503 # error "unsupported platform: need a 32-bit type"
550 #ifndef XXH_NO_STREAM
686 unsigned char digest[4];
713 #ifdef __has_attribute
714 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
716 # define XXH_HAS_ATTRIBUTE(x) 0
724 #define XXH_C23_VN 201711L
727 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
728 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
730 # define XXH_HAS_C_ATTRIBUTE(x) 0
733 #if defined(__cplusplus) && defined(__has_cpp_attribute)
734 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
736 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
745 #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
746 # define XXH_FALLTHROUGH [[fallthrough]]
747 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
748 # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
750 # define XXH_FALLTHROUGH
758 #if XXH_HAS_ATTRIBUTE(noescape)
759 # define XXH_NOESCAPE __attribute__((noescape))
761 # define XXH_NOESCAPE
771 #ifndef XXH_NO_LONG_LONG
775 #if defined(XXH_DOXYGEN)
782 #elif !defined (__VMS) \
783 && (defined (__cplusplus) \
784 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
789 # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
838 #ifndef XXH_NO_STREAM
950 #define XXH3_SECRET_SIZE_MIN 136
974 #ifndef XXH_NO_STREAM
1062 #ifndef XXH_NO_STREAM
1121 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
1122 #define XXHASH_H_STATIC_13879238742
1149 struct XXH32_state_s {
1150 XXH32_hash_t total_len_32;
1151 XXH32_hash_t large_len;
1153 XXH32_hash_t mem32[4];
1154 XXH32_hash_t memsize;
1155 XXH32_hash_t reserved;
1159 #ifndef XXH_NO_LONG_LONG
1173 struct XXH64_state_s {
1177 XXH32_hash_t memsize;
1178 XXH32_hash_t reserved32;
1184 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
1185 # include <stdalign.h>
1186 # define XXH_ALIGN(n) alignas(n)
1187 #elif defined(__cplusplus) && (__cplusplus >= 201103L)
1189 # define XXH_ALIGN(n) alignas(n)
1190 #elif defined(__GNUC__)
1191 # define XXH_ALIGN(n) __attribute__ ((aligned(n)))
1192 #elif defined(_MSC_VER)
1193 # define XXH_ALIGN(n) __declspec(align(n))
1195 # define XXH_ALIGN(n)
1199 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) \
1200 && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) \
1201 && defined(__GNUC__)
1202 # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1204 # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1214 #define XXH3_INTERNALBUFFER_SIZE 256
1223 #define XXH3_SECRET_DEFAULT_SIZE 192
1247 struct XXH3_state_s {
1250 XXH_ALIGN_MEMBER(64,
unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1252 XXH_ALIGN_MEMBER(64,
unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1254 XXH32_hash_t bufferedSize;
1256 XXH32_hash_t useSeed;
1258 size_t nbStripesSoFar;
1262 size_t nbStripesPerBlock;
1270 const unsigned char* extSecret;
1276 #undef XXH_ALIGN_MEMBER
1289 #define XXH3_INITSTATE(XXH3_state_ptr) \
1291 XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
1292 tmp_xxh3_state_ptr->seed = 0; \
1293 tmp_xxh3_state_ptr->extSecret = NULL; \
1433 #ifndef XXH_NO_STREAM
1448 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1449 # define XXH_IMPLEMENTATION
1482 #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1483 || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1484 # define XXH_IMPLEM_13a8737387
1502 # define XXH_NO_LONG_LONG
1503 # undef XXH_NO_LONG_LONG
1554 # define XXH_FORCE_MEMORY_ACCESS 0
1582 # define XXH_SIZE_OPT 0
1612 # define XXH_FORCE_ALIGN_CHECK 0
1634 # define XXH_NO_INLINE_HINTS 0
1651 # define XXH3_INLINE_SECRET 0
1663 # define XXH32_ENDJMP 0
1672 # define XXH_OLD_NAMES
1673 # undef XXH_OLD_NAMES
1683 # define XXH_NO_STREAM
1684 # undef XXH_NO_STREAM
1690 #ifndef XXH_FORCE_MEMORY_ACCESS
1694 # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1695 # define XXH_FORCE_MEMORY_ACCESS 1
1699 #ifndef XXH_SIZE_OPT
1701 # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1702 # define XXH_SIZE_OPT 1
1704 # define XXH_SIZE_OPT 0
1708 #ifndef XXH_FORCE_ALIGN_CHECK
1710 # if XXH_SIZE_OPT >= 1 || \
1711 defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
1712 || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM)
1713 # define XXH_FORCE_ALIGN_CHECK 0
1715 # define XXH_FORCE_ALIGN_CHECK 1
1719 #ifndef XXH_NO_INLINE_HINTS
1720 # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)
1721 # define XXH_NO_INLINE_HINTS 1
1723 # define XXH_NO_INLINE_HINTS 0
1727 #ifndef XXH3_INLINE_SECRET
1728 # if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
1729 || !defined(XXH_INLINE_ALL)
1730 # define XXH3_INLINE_SECRET 0
1732 # define XXH3_INLINE_SECRET 1
1736 #ifndef XXH32_ENDJMP
1738 # define XXH32_ENDJMP 0
1750 #if defined(XXH_NO_STREAM)
1752 #elif defined(XXH_NO_STDLIB)
1763 static XXH_CONSTF void* XXH_malloc(
size_t s) { (void)
s;
return NULL; }
1764 static void XXH_free(
void*
p) { (void)
p; }
1784 static void XXH_free(
void*
p) { free(
p); }
1794 static void* XXH_memcpy(
void*
dest,
const void*
src,
size_t size)
1806 # pragma warning(disable : 4127)
1809 #if XXH_NO_INLINE_HINTS
1810 # if defined(__GNUC__) || defined(__clang__)
1811 # define XXH_FORCE_INLINE static __attribute__((unused))
1813 # define XXH_FORCE_INLINE static
1815 # define XXH_NO_INLINE static
1817 #elif defined(__GNUC__) || defined(__clang__)
1818 # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1819 # define XXH_NO_INLINE static __attribute__((noinline))
1820 #elif defined(_MSC_VER)
1821 # define XXH_FORCE_INLINE static __forceinline
1822 # define XXH_NO_INLINE static __declspec(noinline)
1823 #elif defined (__cplusplus) \
1824 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))
1825 # define XXH_FORCE_INLINE static inline
1826 # define XXH_NO_INLINE static
1828 # define XXH_FORCE_INLINE static
1829 # define XXH_NO_INLINE static
1832 #if XXH3_INLINE_SECRET
1833 # define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
1835 # define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
1850 #ifndef XXH_DEBUGLEVEL
1852 # define XXH_DEBUGLEVEL DEBUGLEVEL
1854 # define XXH_DEBUGLEVEL 0
1858 #if (XXH_DEBUGLEVEL>=1) || __CPPCHECK__
1859 # include <assert.h>
1860 # define XXH_ASSERT(c) assert(c)
1862 # define XXH_ASSERT(c) XXH_ASSUME(c)
1866 #ifndef XXH_STATIC_ASSERT
1867 # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
1868 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
1869 # elif defined(__cplusplus) && (__cplusplus >= 201103L)
1870 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1872 # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
1874 # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
1893 #if defined(__GNUC__) || defined(__clang__)
1894 # define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
1896 # define XXH_COMPILER_GUARD(var) ((void)0)
1899 #if defined(__clang__)
1900 # define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
1902 # define XXH_COMPILER_GUARD_W(var) ((void)0)
1908 #if !defined (__VMS) \
1909 && (defined (__cplusplus) \
1910 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
1911 # include <stdint.h>
1914 typedef unsigned char xxh_u8;
1916 typedef XXH32_hash_t xxh_u32;
1918 #ifdef XXH_OLD_NAMES
1919 # define BYTE xxh_u8
1921 # define U32 xxh_u32
1976 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
1981 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
1987 static xxh_u32 XXH_read32(
const void* memPtr) {
return *(
const xxh_u32*) memPtr; }
1989 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1998 #ifdef XXH_OLD_NAMES
2001 static xxh_u32 XXH_read32(
const void*
ptr)
2004 return *((
const xxh_unalign32*)
ptr);
2013 static xxh_u32 XXH_read32(
const void* memPtr)
2016 XXH_memcpy(&
val, memPtr,
sizeof(
val));
2041 #ifndef XXH_CPU_LITTLE_ENDIAN
2046 # if defined(_WIN32) \
2047 || defined(__LITTLE_ENDIAN__) \
2048 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2049 # define XXH_CPU_LITTLE_ENDIAN 1
2050 # elif defined(__BIG_ENDIAN__) \
2051 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2052 # define XXH_CPU_LITTLE_ENDIAN 0
2060 static int XXH_isLittleEndian(
void)
2066 const union { xxh_u32
u; xxh_u8
c[4]; }
one = { 1 };
2069 # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
2079 #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2081 #ifdef __has_builtin
2082 # define XXH_HAS_BUILTIN(x) __has_builtin(x)
2084 # define XXH_HAS_BUILTIN(x) 0
2116 #if XXH_HAS_BUILTIN(__builtin_unreachable)
2117 # define XXH_UNREACHABLE() __builtin_unreachable()
2119 #elif defined(_MSC_VER)
2120 # define XXH_UNREACHABLE() __assume(0)
2123 # define XXH_UNREACHABLE()
2126 #if XXH_HAS_BUILTIN(__builtin_assume)
2127 # define XXH_ASSUME(c) __builtin_assume(c)
2129 # define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
2145 #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
2146 && XXH_HAS_BUILTIN(__builtin_rotateleft64)
2147 # define XXH_rotl32 __builtin_rotateleft32
2148 # define XXH_rotl64 __builtin_rotateleft64
2150 #elif defined(_MSC_VER)
2151 # define XXH_rotl32(x,r) _rotl(x,r)
2152 # define XXH_rotl64(x,r) _rotl64(x,r)
2154 # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
2155 # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
2166 #if defined(_MSC_VER)
2167 # define XXH_swap32 _byteswap_ulong
2168 #elif XXH_GCC_VERSION >= 403
2169 # define XXH_swap32 __builtin_bswap32
2171 static xxh_u32 XXH_swap32 (xxh_u32
x)
2173 return ((
x << 24) & 0xff000000 ) |
2174 ((
x << 8) & 0x00ff0000 ) |
2175 ((
x >> 8) & 0x0000ff00 ) |
2176 ((
x >> 24) & 0x000000ff );
2199 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2201 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(
const void* memPtr)
2203 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2205 | ((xxh_u32)bytePtr[1] << 8)
2206 | ((xxh_u32)bytePtr[2] << 16)
2207 | ((xxh_u32)bytePtr[3] << 24);
2210 XXH_FORCE_INLINE xxh_u32 XXH_readBE32(
const void* memPtr)
2212 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2214 | ((xxh_u32)bytePtr[2] << 8)
2215 | ((xxh_u32)bytePtr[1] << 16)
2216 | ((xxh_u32)bytePtr[0] << 24);
2220 XXH_FORCE_INLINE xxh_u32 XXH_readLE32(
const void*
ptr)
2222 return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(
ptr) : XXH_swap32(XXH_read32(
ptr));
2225 static xxh_u32 XXH_readBE32(
const void*
ptr)
2227 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(
ptr)) : XXH_read32(
ptr);
2231 XXH_FORCE_INLINE xxh_u32
2232 XXH_readLE32_align(
const void*
ptr, XXH_alignment align)
2234 if (align==XXH_unaligned) {
2235 return XXH_readLE32(
ptr);
2237 return XXH_CPU_LITTLE_ENDIAN ? *(
const xxh_u32*)
ptr : XXH_swap32(*(
const xxh_u32*)
ptr);
2261 #define XXH_PRIME32_1 0x9E3779B1U
2262 #define XXH_PRIME32_2 0x85EBCA77U
2263 #define XXH_PRIME32_3 0xC2B2AE3DU
2264 #define XXH_PRIME32_4 0x27D4EB2FU
2265 #define XXH_PRIME32_5 0x165667B1U
2267 #ifdef XXH_OLD_NAMES
2268 # define PRIME32_1 XXH_PRIME32_1
2269 # define PRIME32_2 XXH_PRIME32_2
2270 # define PRIME32_3 XXH_PRIME32_3
2271 # define PRIME32_4 XXH_PRIME32_4
2272 # define PRIME32_5 XXH_PRIME32_5
2286 static xxh_u32 XXH32_round(xxh_u32
acc, xxh_u32
input)
2289 acc = XXH_rotl32(
acc, 13);
2290 acc *= XXH_PRIME32_1;
2291 #if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
2325 XXH_COMPILER_GUARD(
acc);
2340 static xxh_u32 XXH32_avalanche(xxh_u32
hash)
2343 hash *= XXH_PRIME32_2;
2345 hash *= XXH_PRIME32_3;
2350 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
2368 XXH32_finalize(xxh_u32
hash,
const xxh_u8*
ptr,
size_t len, XXH_alignment align)
2370 #define XXH_PROCESS1 do { \
2371 hash += (*ptr++) * XXH_PRIME32_5; \
2372 hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
2375 #define XXH_PROCESS4 do { \
2376 hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2378 hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
2381 if (
ptr==NULL) XXH_ASSERT(len == 0);
2384 if (!XXH32_ENDJMP) {
2394 return XXH32_avalanche(
hash);
2397 case 12: XXH_PROCESS4;
2399 case 8: XXH_PROCESS4;
2401 case 4: XXH_PROCESS4;
2402 return XXH32_avalanche(
hash);
2404 case 13: XXH_PROCESS4;
2406 case 9: XXH_PROCESS4;
2408 case 5: XXH_PROCESS4;
2410 return XXH32_avalanche(
hash);
2412 case 14: XXH_PROCESS4;
2414 case 10: XXH_PROCESS4;
2416 case 6: XXH_PROCESS4;
2419 return XXH32_avalanche(
hash);
2421 case 15: XXH_PROCESS4;
2423 case 11: XXH_PROCESS4;
2425 case 7: XXH_PROCESS4;
2427 case 3: XXH_PROCESS1;
2429 case 2: XXH_PROCESS1;
2431 case 1: XXH_PROCESS1;
2433 case 0:
return XXH32_avalanche(
hash);
2440 #ifdef XXH_OLD_NAMES
2441 # define PROCESS1 XXH_PROCESS1
2442 # define PROCESS4 XXH_PROCESS4
2444 # undef XXH_PROCESS1
2445 # undef XXH_PROCESS4
2457 XXH32_endian_align(
const xxh_u8*
input,
size_t len, xxh_u32 seed, XXH_alignment align)
2461 if (
input==NULL) XXH_ASSERT(len == 0);
2464 const xxh_u8*
const bEnd =
input + len;
2465 const xxh_u8*
const limit = bEnd - 15;
2466 xxh_u32 v1 =
seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2467 xxh_u32
v2 =
seed + XXH_PRIME32_2;
2468 xxh_u32 v3 =
seed + 0;
2469 xxh_u32 v4 =
seed - XXH_PRIME32_1;
2472 v1 = XXH32_round(v1, XXH_get32bits(
input));
input += 4;
2474 v3 = XXH32_round(v3, XXH_get32bits(
input));
input += 4;
2475 v4 = XXH32_round(v4, XXH_get32bits(
input));
input += 4;
2478 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(
v2, 7)
2479 + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2481 h32 =
seed + XXH_PRIME32_5;
2484 h32 += (xxh_u32)len;
2486 return XXH32_finalize(h32,
input, len&15, align);
2492 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2499 if (XXH_FORCE_ALIGN_CHECK) {
2500 if ((((
size_t)
input) & 3) == 0) {
2501 return XXH32_endian_align((
const xxh_u8*)
input, len, seed, XXH_aligned);
2504 return XXH32_endian_align((
const xxh_u8*)
input, len, seed, XXH_unaligned);
2511 #ifndef XXH_NO_STREAM
2527 XXH_memcpy(dstState, srcState,
sizeof(*dstState));
2533 XXH_ASSERT(statePtr != NULL);
2534 memset(statePtr, 0,
sizeof(*statePtr));
2535 statePtr->v[0] =
seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2536 statePtr->v[1] =
seed + XXH_PRIME32_2;
2537 statePtr->v[2] =
seed + 0;
2538 statePtr->v[3] =
seed - XXH_PRIME32_1;
2548 XXH_ASSERT(len == 0);
2552 {
const xxh_u8*
p = (
const xxh_u8*)
input;
2553 const xxh_u8*
const bEnd =
p + len;
2555 state->total_len_32 += (XXH32_hash_t)len;
2556 state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2558 if (state->memsize + len < 16) {
2559 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize,
input, len);
2560 state->memsize += (XXH32_hash_t)len;
2564 if (state->memsize) {
2565 XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize,
input, 16-state->memsize);
2566 {
const xxh_u32* p32 = state->mem32;
2567 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2568 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2569 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2570 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2572 p += 16-state->memsize;
2577 const xxh_u8*
const limit = bEnd - 16;
2580 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(
p));
p+=4;
2581 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(
p));
p+=4;
2582 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(
p));
p+=4;
2583 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(
p));
p+=4;
2589 XXH_memcpy(state->mem32,
p, (
size_t)(bEnd-
p));
2603 if (state->large_len) {
2604 h32 = XXH_rotl32(state->v[0], 1)
2605 + XXH_rotl32(state->v[1], 7)
2606 + XXH_rotl32(state->v[2], 12)
2607 + XXH_rotl32(state->v[3], 18);
2609 h32 = state->v[2] + XXH_PRIME32_5;
2612 h32 += state->total_len_32;
2614 return XXH32_finalize(h32, (
const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2637 if (XXH_CPU_LITTLE_ENDIAN)
hash = XXH_swap32(
hash);
2638 XXH_memcpy(dst, &
hash,
sizeof(*dst));
2643 return XXH_readBE32(
src);
2647 #ifndef XXH_NO_LONG_LONG
2661 #ifdef XXH_OLD_NAMES
2662 # define U64 xxh_u64
2665 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2670 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2673 static xxh_u64 XXH_read64(
const void* memPtr)
2675 return *(
const xxh_u64*) memPtr;
2678 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2687 #ifdef XXH_OLD_NAMES
2690 static xxh_u64 XXH_read64(
const void*
ptr)
2693 return *((
const xxh_unalign64*)
ptr);
2702 static xxh_u64 XXH_read64(
const void* memPtr)
2705 XXH_memcpy(&
val, memPtr,
sizeof(
val));
2711 #if defined(_MSC_VER)
2712 # define XXH_swap64 _byteswap_uint64
2713 #elif XXH_GCC_VERSION >= 403
2714 # define XXH_swap64 __builtin_bswap64
2716 static xxh_u64 XXH_swap64(xxh_u64
x)
2718 return ((
x << 56) & 0xff00000000000000ULL) |
2719 ((
x << 40) & 0x00ff000000000000ULL) |
2720 ((
x << 24) & 0x0000ff0000000000ULL) |
2721 ((
x << 8) & 0x000000ff00000000ULL) |
2722 ((
x >> 8) & 0x00000000ff000000ULL) |
2723 ((
x >> 24) & 0x0000000000ff0000ULL) |
2724 ((
x >> 40) & 0x000000000000ff00ULL) |
2725 ((
x >> 56) & 0x00000000000000ffULL);
2731 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2733 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(
const void* memPtr)
2735 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2737 | ((xxh_u64)bytePtr[1] << 8)
2738 | ((xxh_u64)bytePtr[2] << 16)
2739 | ((xxh_u64)bytePtr[3] << 24)
2740 | ((xxh_u64)bytePtr[4] << 32)
2741 | ((xxh_u64)bytePtr[5] << 40)
2742 | ((xxh_u64)bytePtr[6] << 48)
2743 | ((xxh_u64)bytePtr[7] << 56);
2746 XXH_FORCE_INLINE xxh_u64 XXH_readBE64(
const void* memPtr)
2748 const xxh_u8* bytePtr = (
const xxh_u8 *)memPtr;
2750 | ((xxh_u64)bytePtr[6] << 8)
2751 | ((xxh_u64)bytePtr[5] << 16)
2752 | ((xxh_u64)bytePtr[4] << 24)
2753 | ((xxh_u64)bytePtr[3] << 32)
2754 | ((xxh_u64)bytePtr[2] << 40)
2755 | ((xxh_u64)bytePtr[1] << 48)
2756 | ((xxh_u64)bytePtr[0] << 56);
2760 XXH_FORCE_INLINE xxh_u64 XXH_readLE64(
const void*
ptr)
2762 return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(
ptr) : XXH_swap64(XXH_read64(
ptr));
2765 static xxh_u64 XXH_readBE64(
const void*
ptr)
2767 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(
ptr)) : XXH_read64(
ptr);
2771 XXH_FORCE_INLINE xxh_u64
2772 XXH_readLE64_align(
const void*
ptr, XXH_alignment align)
2774 if (align==XXH_unaligned)
2775 return XXH_readLE64(
ptr);
2777 return XXH_CPU_LITTLE_ENDIAN ? *(
const xxh_u64*)
ptr : XXH_swap64(*(
const xxh_u64*)
ptr);
2791 #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL
2792 #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL
2793 #define XXH_PRIME64_3 0x165667B19E3779F9ULL
2794 #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL
2795 #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL
2797 #ifdef XXH_OLD_NAMES
2798 # define PRIME64_1 XXH_PRIME64_1
2799 # define PRIME64_2 XXH_PRIME64_2
2800 # define PRIME64_3 XXH_PRIME64_3
2801 # define PRIME64_4 XXH_PRIME64_4
2802 # define PRIME64_5 XXH_PRIME64_5
2806 static xxh_u64 XXH64_round(xxh_u64
acc, xxh_u64
input)
2809 acc = XXH_rotl64(
acc, 31);
2810 acc *= XXH_PRIME64_1;
2814 static xxh_u64 XXH64_mergeRound(xxh_u64
acc, xxh_u64
val)
2816 val = XXH64_round(0,
val);
2818 acc =
acc * XXH_PRIME64_1 + XXH_PRIME64_4;
2823 static xxh_u64 XXH64_avalanche(xxh_u64
hash)
2826 hash *= XXH_PRIME64_2;
2828 hash *= XXH_PRIME64_3;
2834 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
2852 XXH64_finalize(xxh_u64
hash,
const xxh_u8*
ptr,
size_t len, XXH_alignment align)
2854 if (
ptr==NULL) XXH_ASSERT(len == 0);
2857 xxh_u64
const k1 = XXH64_round(0, XXH_get64bits(
ptr));
2860 hash = XXH_rotl64(
hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2864 hash ^= (xxh_u64)(XXH_get32bits(
ptr)) * XXH_PRIME64_1;
2866 hash = XXH_rotl64(
hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2870 hash ^= (*
ptr++) * XXH_PRIME64_5;
2871 hash = XXH_rotl64(
hash, 11) * XXH_PRIME64_1;
2874 return XXH64_avalanche(
hash);
2877 #ifdef XXH_OLD_NAMES
2878 # define PROCESS1_64 XXH_PROCESS1_64
2879 # define PROCESS4_64 XXH_PROCESS4_64
2880 # define PROCESS8_64 XXH_PROCESS8_64
2882 # undef XXH_PROCESS1_64
2883 # undef XXH_PROCESS4_64
2884 # undef XXH_PROCESS8_64
2896 XXH64_endian_align(
const xxh_u8*
input,
size_t len, xxh_u64 seed, XXH_alignment align)
2899 if (
input==NULL) XXH_ASSERT(len == 0);
2902 const xxh_u8*
const bEnd =
input + len;
2903 const xxh_u8*
const limit = bEnd - 31;
2904 xxh_u64 v1 =
seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2905 xxh_u64
v2 =
seed + XXH_PRIME64_2;
2906 xxh_u64 v3 =
seed + 0;
2907 xxh_u64 v4 =
seed - XXH_PRIME64_1;
2910 v1 = XXH64_round(v1, XXH_get64bits(
input));
input+=8;
2912 v3 = XXH64_round(v3, XXH_get64bits(
input));
input+=8;
2913 v4 = XXH64_round(v4, XXH_get64bits(
input));
input+=8;
2916 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(
v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2917 h64 = XXH64_mergeRound(h64, v1);
2918 h64 = XXH64_mergeRound(h64,
v2);
2919 h64 = XXH64_mergeRound(h64, v3);
2920 h64 = XXH64_mergeRound(h64, v4);
2923 h64 =
seed + XXH_PRIME64_5;
2926 h64 += (xxh_u64) len;
2928 return XXH64_finalize(h64,
input, len, align);
2935 #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2942 if (XXH_FORCE_ALIGN_CHECK) {
2943 if ((((
size_t)
input) & 7)==0) {
2944 return XXH64_endian_align((
const xxh_u8*)
input, len, seed, XXH_aligned);
2947 return XXH64_endian_align((
const xxh_u8*)
input, len, seed, XXH_unaligned);
2953 #ifndef XXH_NO_STREAM
2969 XXH_memcpy(dstState, srcState,
sizeof(*dstState));
2975 XXH_ASSERT(statePtr != NULL);
2976 memset(statePtr, 0,
sizeof(*statePtr));
2977 statePtr->v[0] =
seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2978 statePtr->v[1] =
seed + XXH_PRIME64_2;
2979 statePtr->v[2] =
seed + 0;
2980 statePtr->v[3] =
seed - XXH_PRIME64_1;
2989 XXH_ASSERT(len == 0);
2993 {
const xxh_u8*
p = (
const xxh_u8*)
input;
2994 const xxh_u8*
const bEnd =
p + len;
2996 state->total_len += len;
2998 if (state->memsize + len < 32) {
2999 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize,
input, len);
3000 state->memsize += (xxh_u32)len;
3004 if (state->memsize) {
3005 XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize,
input, 32-state->memsize);
3006 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
3007 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
3008 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
3009 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
3010 p += 32 - state->memsize;
3015 const xxh_u8*
const limit = bEnd - 32;
3018 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(
p));
p+=8;
3019 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(
p));
p+=8;
3020 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(
p));
p+=8;
3021 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(
p));
p+=8;
3027 XXH_memcpy(state->mem64,
p, (
size_t)(bEnd-
p));
3041 if (state->total_len >= 32) {
3042 h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
3043 h64 = XXH64_mergeRound(h64, state->v[0]);
3044 h64 = XXH64_mergeRound(h64, state->v[1]);
3045 h64 = XXH64_mergeRound(h64, state->v[2]);
3046 h64 = XXH64_mergeRound(h64, state->v[3]);
3048 h64 = state->v[2] + XXH_PRIME64_5;
3051 h64 += (xxh_u64) state->total_len;
3053 return XXH64_finalize(h64, (
const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
3063 if (XXH_CPU_LITTLE_ENDIAN)
hash = XXH_swap64(
hash);
3064 XXH_memcpy(dst, &
hash,
sizeof(*dst));
3070 return XXH_readBE64(
src);
3088 #if ((defined(sun) || defined(__sun)) && __cplusplus)
3089 # define XXH_RESTRICT
3090 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
3091 # define XXH_RESTRICT restrict
3092 #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
3093 || (defined (__clang__)) \
3094 || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
3095 || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
3100 # define XXH_RESTRICT __restrict
3102 # define XXH_RESTRICT
3105 #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
3106 || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
3107 || defined(__clang__)
3108 # define XXH_likely(x) __builtin_expect(x, 1)
3109 # define XXH_unlikely(x) __builtin_expect(x, 0)
3111 # define XXH_likely(x) (x)
3112 # define XXH_unlikely(x) (x)
3115 #if defined(__GNUC__) || defined(__clang__)
3116 # if defined(__ARM_FEATURE_SVE)
3117 # include <arm_sve.h>
3119 # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3120 || (defined(_M_ARM) && _M_ARM >= 7) \
3121 || defined(_M_ARM64) || defined(_M_ARM64EC)
3122 # define inline __inline__
3123 # include <arm_neon.h>
3125 # elif defined(__AVX2__)
3126 # include <immintrin.h>
3127 # elif defined(__SSE2__)
3128 # include <emmintrin.h>
3132 #if defined(_MSC_VER)
3133 # include <intrin.h>
3205 #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
3206 # warning "XXH3 is highly inefficient without ARM or Thumb-2."
3224 # define XXH_VECTOR XXH_SCALAR
3234 enum XXH_VECTOR_TYPE {
3257 # define XXH_ACC_ALIGN 8
3262 # define XXH_SCALAR 0
3265 # define XXH_AVX512 3
3272 # if defined(__ARM_FEATURE_SVE)
3273 # define XXH_VECTOR XXH_SVE
3275 defined(__ARM_NEON__) || defined(__ARM_NEON) \
3276 || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) \
3278 defined(_WIN32) || defined(__LITTLE_ENDIAN__) \
3279 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3281 # define XXH_VECTOR XXH_NEON
3282 # elif defined(__AVX512F__)
3283 # define XXH_VECTOR XXH_AVX512
3284 # elif defined(__AVX2__)
3285 # define XXH_VECTOR XXH_AVX2
3286 # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
3287 # define XXH_VECTOR XXH_SSE2
3288 # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
3289 || (defined(__s390x__) && defined(__VEC__)) \
3290 && defined(__GNUC__)
3291 # define XXH_VECTOR XXH_VSX
3293 # define XXH_VECTOR XXH_SCALAR
3298 #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
3300 # pragma warning(once : 4606)
3302 # warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
3305 # define XXH_VECTOR XXH_SCALAR
3312 #ifndef XXH_ACC_ALIGN
3313 # if defined(XXH_X86DISPATCH)
3314 # define XXH_ACC_ALIGN 64
3315 # elif XXH_VECTOR == XXH_SCALAR
3316 # define XXH_ACC_ALIGN 8
3317 # elif XXH_VECTOR == XXH_SSE2
3318 # define XXH_ACC_ALIGN 16
3319 # elif XXH_VECTOR == XXH_AVX2
3320 # define XXH_ACC_ALIGN 32
3321 # elif XXH_VECTOR == XXH_NEON
3322 # define XXH_ACC_ALIGN 16
3323 # elif XXH_VECTOR == XXH_VSX
3324 # define XXH_ACC_ALIGN 16
3325 # elif XXH_VECTOR == XXH_AVX512
3326 # define XXH_ACC_ALIGN 64
3327 # elif XXH_VECTOR == XXH_SVE
3328 # define XXH_ACC_ALIGN 64
3332 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
3333 || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
3334 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3335 #elif XXH_VECTOR == XXH_SVE
3336 # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3338 # define XXH_SEC_ALIGN 8
3341 #if defined(__GNUC__) || defined(__clang__)
3342 # define XXH_ALIASING __attribute__((may_alias))
3344 # define XXH_ALIASING
3368 #if XXH_VECTOR == XXH_AVX2 \
3369 && defined(__GNUC__) && !defined(__clang__) \
3370 && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0
3371 # pragma GCC push_options
3372 # pragma GCC optimize("-O2")
3375 #if XXH_VECTOR == XXH_NEON
3384 typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
3399 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3400 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(
void const*
ptr)
3402 return *(xxh_aliasing_uint64x2_t
const *)
ptr;
3405 XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(
void const*
ptr)
3407 return vreinterpretq_u64_u8(vld1q_u8((
uint8_t const*)
ptr));
3419 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
3420 XXH_FORCE_INLINE uint64x2_t
3421 XXH_vmlal_low_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3424 __asm__(
"umlal %0.2d, %1.2s, %2.2s" :
"+w" (
acc) :
"w" (lhs),
"w" (rhs));
3427 XXH_FORCE_INLINE uint64x2_t
3428 XXH_vmlal_high_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3431 return vmlal_high_u32(
acc, lhs, rhs);
3435 XXH_FORCE_INLINE uint64x2_t
3436 XXH_vmlal_low_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3438 return vmlal_u32(
acc, vget_low_u32(lhs), vget_low_u32(rhs));
3442 XXH_FORCE_INLINE uint64x2_t
3443 XXH_vmlal_high_u32(uint64x2_t
acc, uint32x4_t lhs, uint32x4_t rhs)
3445 return vmlal_u32(
acc, vget_high_u32(lhs), vget_high_u32(rhs));
3484 # ifndef XXH3_NEON_LANES
3485 # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3486 && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
3487 # define XXH3_NEON_LANES 6
3489 # define XXH3_NEON_LANES XXH_ACC_NB
3502 #if XXH_VECTOR == XXH_VSX
3511 # pragma push_macro("bool")
3512 # pragma push_macro("vector")
3513 # pragma push_macro("pixel")
3519 # if defined(__s390x__)
3520 # include <s390intrin.h>
3522 # include <altivec.h>
3526 # pragma pop_macro("pixel")
3527 # pragma pop_macro("vector")
3528 # pragma pop_macro("bool")
3530 typedef __vector
unsigned long long xxh_u64x2;
3531 typedef __vector
unsigned char xxh_u8x16;
3532 typedef __vector
unsigned xxh_u32x4;
3537 typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
3540 # if defined(__BIG_ENDIAN__) \
3541 || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3542 # define XXH_VSX_BE 1
3543 # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3544 # warning "-maltivec=be is not recommended. Please use native endianness."
3545 # define XXH_VSX_BE 1
3547 # define XXH_VSX_BE 0
3552 # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3553 # define XXH_vec_revb vec_revb
3558 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2
val)
3560 xxh_u8x16
const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3561 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3562 return vec_perm(
val,
val, vByteSwap);
3570 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(
const void *
ptr)
3573 XXH_memcpy(&ret,
ptr,
sizeof(xxh_u64x2));
3575 ret = XXH_vec_revb(ret);
3586 # if defined(__s390x__)
3588 # define XXH_vec_mulo vec_mulo
3589 # define XXH_vec_mule vec_mule
3590 # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
3593 # define XXH_vec_mulo __builtin_altivec_vmulouw
3594 # define XXH_vec_mule __builtin_altivec_vmuleuw
3598 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4
a, xxh_u32x4
b)
3601 __asm__(
"vmulouw %0, %1, %2" :
"=v" (
result) :
"v" (
a),
"v" (
b));
3604 XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4
a, xxh_u32x4
b)
3607 __asm__(
"vmuleuw %0, %1, %2" :
"=v" (
result) :
"v" (
a),
"v" (
b));
3613 #if XXH_VECTOR == XXH_SVE
3614 #define ACCRND(acc, offset) \
3616 svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
3617 svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
3618 svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
3619 svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
3620 svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
3621 svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
3622 svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
3623 acc = svadd_u64_x(mask, acc, mul); \
3630 #if defined(XXH_NO_PREFETCH)
3631 # define XXH_PREFETCH(ptr) (void)(ptr)
3633 # if XXH_SIZE_OPT >= 1
3634 # define XXH_PREFETCH(ptr) (void)(ptr)
3635 # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
3636 # include <mmintrin.h>
3637 # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3638 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3639 # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 , 3 )
3641 # define XXH_PREFETCH(ptr) (void)(ptr)
3650 #define XXH_SECRET_DEFAULT_SIZE 192
3652 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3653 # error "default keyset is not large enough"
3657 XXH_ALIGN(64) static
const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3658 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3659 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3660 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3661 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3662 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3663 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3664 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3665 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3666 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3667 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3668 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3669 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3672 static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;
3673 static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;
3675 #ifdef XXH_OLD_NAMES
3676 # define kSecret XXH3_kSecret
3696 XXH_FORCE_INLINE xxh_u64
3697 XXH_mult32to64(xxh_u64
x, xxh_u64
y)
3701 #elif defined(_MSC_VER) && defined(_M_IX86)
3702 # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3711 # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
3724 XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3741 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
3742 && defined(__SIZEOF_INT128__) \
3743 || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3745 __uint128_t
const product = (__uint128_t)lhs * (__uint128_t)rhs;
3747 r128.
low64 = (xxh_u64)(product);
3748 r128.
high64 = (xxh_u64)(product >> 64);
3758 #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
3761 # pragma intrinsic(_umul128)
3763 xxh_u64 product_high;
3764 xxh_u64
const product_low = _umul128(lhs, rhs, &product_high);
3766 r128.
low64 = product_low;
3767 r128.
high64 = product_high;
3775 #elif defined(_M_ARM64) || defined(_M_ARM64EC)
3778 # pragma intrinsic(__umulh)
3781 r128.
low64 = lhs * rhs;
3782 r128.
high64 = __umulh(lhs, rhs);
3831 xxh_u64
const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0
xFFFFFFFF);
3832 xxh_u64
const lo_hi = XXH_mult32to64(lhs & 0
xFFFFFFFF, rhs >> 32);
3833 xxh_u64
const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
3836 xxh_u64
const cross = (lo_lo >> 32) + (hi_lo & 0
xFFFFFFFF) + lo_hi;
3837 xxh_u64
const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
3838 xxh_u64
const lower = (cross << 32) | (lo_lo & 0
xFFFFFFFF);
3858 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3865 XXH_FORCE_INLINE
XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64,
int shift)
3867 XXH_ASSERT(0 <= shift && shift < 64);
3868 return v64 ^ (v64 >> shift);
3877 h64 = XXH_xorshift64(h64, 37);
3879 h64 = XXH_xorshift64(h64, 32);
3888 static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3891 h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
3893 h64 ^= (h64 >> 35) + len ;
3895 return XXH_xorshift64(h64, 28);
3933 XXH3_len_1to3_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3935 XXH_ASSERT(
input != NULL);
3936 XXH_ASSERT(1 <= len && len <= 3);
3937 XXH_ASSERT(secret != NULL);
3944 xxh_u8
const c2 =
input[len >> 1];
3945 xxh_u8
const c3 =
input[len - 1];
3946 xxh_u32
const combined = ((xxh_u32)
c1 << 16) | ((xxh_u32)
c2 << 24)
3947 | ((xxh_u32)
c3 << 0) | ((xxh_u32)len << 8);
3948 xxh_u64
const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
3949 xxh_u64
const keyed = (xxh_u64)combined ^ bitflip;
3950 return XXH64_avalanche(keyed);
3955 XXH3_len_4to8_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3957 XXH_ASSERT(
input != NULL);
3958 XXH_ASSERT(secret != NULL);
3959 XXH_ASSERT(4 <= len && len <= 8);
3960 seed ^= (xxh_u64)XXH_swap32((xxh_u32)
seed) << 32;
3962 xxh_u32
const input2 = XXH_readLE32(
input + len - 4);
3963 xxh_u64
const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
3964 xxh_u64
const input64 = input2 + (((xxh_u64)
input1) << 32);
3965 xxh_u64
const keyed = input64 ^ bitflip;
3966 return XXH3_rrmxmx(keyed, len);
3971 XXH3_len_9to16_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3973 XXH_ASSERT(
input != NULL);
3974 XXH_ASSERT(secret != NULL);
3975 XXH_ASSERT(9 <= len && len <= 16);
3976 { xxh_u64
const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
3977 xxh_u64
const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
3978 xxh_u64
const input_lo = XXH_readLE64(
input) ^ bitflip1;
3979 xxh_u64
const input_hi = XXH_readLE64(
input + len - 8) ^ bitflip2;
3980 xxh_u64
const acc = len
3981 + XXH_swap64(input_lo) + input_hi
3982 + XXH3_mul128_fold64(input_lo, input_hi);
3983 return XXH3_avalanche(
acc);
3988 XXH3_len_0to16_64b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
3990 XXH_ASSERT(len <= 16);
3991 {
if (XXH_likely(len > 8))
return XXH3_len_9to16_64b(
input, len, secret, seed);
3992 if (XXH_likely(len >= 4))
return XXH3_len_4to8_64b(
input, len, secret, seed);
3993 if (len)
return XXH3_len_1to3_64b(
input, len, secret, seed);
3994 return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
4024 XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(
const xxh_u8* XXH_RESTRICT
input,
4025 const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
4027 #if defined(__GNUC__) && !defined(__clang__) \
4028 && defined(__i386__) && defined(__SSE2__) \
4029 && !defined(XXH_ENABLE_AUTOVECTORIZE)
4045 XXH_COMPILER_GUARD(seed64);
4047 { xxh_u64
const input_lo = XXH_readLE64(
input);
4048 xxh_u64
const input_hi = XXH_readLE64(
input+8);
4049 return XXH3_mul128_fold64(
4050 input_lo ^ (XXH_readLE64(secret) + seed64),
4051 input_hi ^ (XXH_readLE64(secret+8) - seed64)
4058 XXH3_len_17to128_64b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
4059 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
4063 XXH_ASSERT(16 < len && len <= 128);
4065 { xxh_u64
acc = len * XXH_PRIME64_1, acc_end;
4066 #if XXH_SIZE_OPT >= 1
4068 unsigned int i = (
unsigned int)(len - 1) / 32;
4070 acc += XXH3_mix16B(
input+16 *
i, secret+32*
i, seed);
4071 acc += XXH3_mix16B(
input+len-16*(
i+1), secret+32*
i+16, seed);
4075 acc += XXH3_mix16B(
input+0, secret+0, seed);
4076 acc_end = XXH3_mix16B(
input+len-16, secret+16, seed);
4078 acc += XXH3_mix16B(
input+16, secret+32, seed);
4079 acc_end += XXH3_mix16B(
input+len-32, secret+48, seed);
4081 acc += XXH3_mix16B(
input+32, secret+64, seed);
4082 acc_end += XXH3_mix16B(
input+len-48, secret+80, seed);
4085 acc += XXH3_mix16B(
input+48, secret+96, seed);
4086 acc_end += XXH3_mix16B(
input+len-64, secret+112, seed);
4091 return XXH3_avalanche(
acc + acc_end);
4095 #define XXH3_MIDSIZE_MAX 240
4098 XXH3_len_129to240_64b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
4099 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
4103 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4105 #define XXH3_MIDSIZE_STARTOFFSET 3
4106 #define XXH3_MIDSIZE_LASTOFFSET 17
4108 { xxh_u64
acc = len * XXH_PRIME64_1;
4110 unsigned int const nbRounds = (
unsigned int)len / 16;
4112 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4113 for (
i=0;
i<8;
i++) {
4114 acc += XXH3_mix16B(
input+(16*
i), secret+(16*
i), seed);
4118 XXH_ASSERT(nbRounds >= 8);
4119 acc = XXH3_avalanche(
acc);
4120 #if defined(__clang__) \
4121 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) \
4122 && !defined(XXH_ENABLE_AUTOVECTORIZE)
4143 #pragma clang loop vectorize(disable)
4145 for (
i=8 ;
i < nbRounds;
i++) {
4149 XXH_COMPILER_GUARD(
acc);
4150 acc_end += XXH3_mix16B(
input+(16*
i), secret+(16*(
i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
4152 return XXH3_avalanche(
acc + acc_end);
4159 #define XXH_STRIPE_LEN 64
4160 #define XXH_SECRET_CONSUME_RATE 8
4161 #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
4163 #ifdef XXH_OLD_NAMES
4164 # define STRIPE_LEN XXH_STRIPE_LEN
4165 # define ACC_NB XXH_ACC_NB
4168 #ifndef XXH_PREFETCH_DIST
4170 # define XXH_PREFETCH_DIST 320
4172 # if (XXH_VECTOR == XXH_AVX512)
4173 # define XXH_PREFETCH_DIST 512
4175 # define XXH_PREFETCH_DIST 384
4190 #define XXH3_ACCUMULATE_TEMPLATE(name) \
4192 XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
4193 const xxh_u8* XXH_RESTRICT input, \
4194 const xxh_u8* XXH_RESTRICT secret, \
4198 for (n = 0; n < nbStripes; n++ ) { \
4199 const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
4200 XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
4201 XXH3_accumulate_512_##name( \
4204 secret + n*XXH_SECRET_CONSUME_RATE); \
4209 XXH_FORCE_INLINE
void XXH_writeLE64(
void* dst, xxh_u64 v64)
4211 if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
4212 XXH_memcpy(dst, &v64,
sizeof(v64));
4220 #if !defined (__VMS) \
4221 && (defined (__cplusplus) \
4222 || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) ) )
4223 typedef int64_t xxh_i64;
4226 typedef long long xxh_i64;
4253 #if (XXH_VECTOR == XXH_AVX512) \
4254 || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
4256 #ifndef XXH_TARGET_AVX512
4257 # define XXH_TARGET_AVX512
4260 XXH_FORCE_INLINE XXH_TARGET_AVX512
void
4261 XXH3_accumulate_512_avx512(
void* XXH_RESTRICT
acc,
4262 const void* XXH_RESTRICT
input,
4263 const void* XXH_RESTRICT secret)
4265 __m512i*
const xacc = (__m512i *)
acc;
4266 XXH_ASSERT((((
size_t)
acc) & 63) == 0);
4267 XXH_STATIC_ASSERT(XXH_STRIPE_LEN ==
sizeof(__m512i));
4271 __m512i
const data_vec = _mm512_loadu_si512 (
input);
4273 __m512i
const key_vec = _mm512_loadu_si512 (secret);
4275 __m512i
const data_key = _mm512_xor_si512 (data_vec, key_vec);
4277 __m512i
const data_key_lo = _mm512_srli_epi64 (data_key, 32);
4279 __m512i
const product = _mm512_mul_epu32 (data_key, data_key_lo);
4281 __m512i
const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
4282 __m512i
const sum = _mm512_add_epi64(*xacc, data_swap);
4284 *xacc = _mm512_add_epi64(product,
sum);
4287 XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
4310 XXH_FORCE_INLINE XXH_TARGET_AVX512
void
4311 XXH3_scrambleAcc_avx512(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4313 XXH_ASSERT((((
size_t)
acc) & 63) == 0);
4314 XXH_STATIC_ASSERT(XXH_STRIPE_LEN ==
sizeof(__m512i));
4315 { __m512i*
const xacc = (__m512i*)
acc;
4316 const __m512i prime32 = _mm512_set1_epi32((
int)XXH_PRIME32_1);
4319 __m512i
const acc_vec = *xacc;
4320 __m512i
const shifted = _mm512_srli_epi64 (acc_vec, 47);
4322 __m512i
const key_vec = _mm512_loadu_si512 (secret);
4323 __m512i
const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 );
4326 __m512i
const data_key_hi = _mm512_srli_epi64 (data_key, 32);
4327 __m512i
const prod_lo = _mm512_mul_epu32 (data_key, prime32);
4328 __m512i
const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
4329 *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
4333 XXH_FORCE_INLINE XXH_TARGET_AVX512
void
4334 XXH3_initCustomSecret_avx512(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4336 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
4337 XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
4338 XXH_ASSERT(((
size_t)customSecret & 63) == 0);
4339 (void)(&XXH_writeLE64);
4340 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE /
sizeof(__m512i);
4341 __m512i
const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
4342 __m512i
const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
4344 const __m512i*
const src = (
const __m512i*) ((
const void*) XXH3_kSecret);
4345 __m512i*
const dest = ( __m512i*) customSecret;
4347 XXH_ASSERT(((
size_t)
src & 63) == 0);
4348 XXH_ASSERT(((
size_t)
dest & 63) == 0);
4349 for (
i=0;
i < nbRounds; ++
i) {
4350 dest[
i] = _mm512_add_epi64(_mm512_load_si512(
src +
i), seed);
4356 #if (XXH_VECTOR == XXH_AVX2) \
4357 || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
4359 #ifndef XXH_TARGET_AVX2
4360 # define XXH_TARGET_AVX2
4363 XXH_FORCE_INLINE XXH_TARGET_AVX2
void
4364 XXH3_accumulate_512_avx2(
void* XXH_RESTRICT
acc,
4365 const void* XXH_RESTRICT
input,
4366 const void* XXH_RESTRICT secret)
4368 XXH_ASSERT((((
size_t)
acc) & 31) == 0);
4369 { __m256i*
const xacc = (__m256i *)
acc;
4372 const __m256i*
const xinput = (
const __m256i *)
input;
4375 const __m256i*
const xsecret = (
const __m256i *) secret;
4378 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m256i);
i++) {
4380 __m256i
const data_vec = _mm256_loadu_si256 (xinput+
i);
4382 __m256i
const key_vec = _mm256_loadu_si256 (xsecret+
i);
4384 __m256i
const data_key = _mm256_xor_si256 (data_vec, key_vec);
4386 __m256i
const data_key_lo = _mm256_srli_epi64 (data_key, 32);
4388 __m256i
const product = _mm256_mul_epu32 (data_key, data_key_lo);
4390 __m256i
const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
4391 __m256i
const sum = _mm256_add_epi64(xacc[
i], data_swap);
4393 xacc[
i] = _mm256_add_epi64(product,
sum);
4396 XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
4398 XXH_FORCE_INLINE XXH_TARGET_AVX2
void
4399 XXH3_scrambleAcc_avx2(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4401 XXH_ASSERT((((
size_t)
acc) & 31) == 0);
4402 { __m256i*
const xacc = (__m256i*)
acc;
4405 const __m256i*
const xsecret = (
const __m256i *) secret;
4406 const __m256i prime32 = _mm256_set1_epi32((
int)XXH_PRIME32_1);
4409 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m256i);
i++) {
4411 __m256i
const acc_vec = xacc[
i];
4412 __m256i
const shifted = _mm256_srli_epi64 (acc_vec, 47);
4413 __m256i
const data_vec = _mm256_xor_si256 (acc_vec, shifted);
4415 __m256i
const key_vec = _mm256_loadu_si256 (xsecret+
i);
4416 __m256i
const data_key = _mm256_xor_si256 (data_vec, key_vec);
4419 __m256i
const data_key_hi = _mm256_srli_epi64 (data_key, 32);
4420 __m256i
const prod_lo = _mm256_mul_epu32 (data_key, prime32);
4421 __m256i
const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
4422 xacc[
i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
4427 XXH_FORCE_INLINE XXH_TARGET_AVX2
void XXH3_initCustomSecret_avx2(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4429 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
4430 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE /
sizeof(__m256i)) == 6);
4431 XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
4432 (void)(&XXH_writeLE64);
4433 XXH_PREFETCH(customSecret);
4434 { __m256i
const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
4436 const __m256i*
const src = (
const __m256i*) ((
const void*) XXH3_kSecret);
4437 __m256i*
dest = ( __m256i*) customSecret;
4439 # if defined(__GNUC__) || defined(__clang__)
4445 XXH_COMPILER_GUARD(
dest);
4447 XXH_ASSERT(((
size_t)
src & 31) == 0);
4448 XXH_ASSERT(((
size_t)
dest & 31) == 0);
4451 dest[0] = _mm256_add_epi64(_mm256_load_si256(
src+0), seed);
4452 dest[1] = _mm256_add_epi64(_mm256_load_si256(
src+1), seed);
4453 dest[2] = _mm256_add_epi64(_mm256_load_si256(
src+2), seed);
4454 dest[3] = _mm256_add_epi64(_mm256_load_si256(
src+3), seed);
4455 dest[4] = _mm256_add_epi64(_mm256_load_si256(
src+4), seed);
4456 dest[5] = _mm256_add_epi64(_mm256_load_si256(
src+5), seed);
4463 #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
4465 #ifndef XXH_TARGET_SSE2
4466 # define XXH_TARGET_SSE2
4469 XXH_FORCE_INLINE XXH_TARGET_SSE2
void
4470 XXH3_accumulate_512_sse2(
void* XXH_RESTRICT
acc,
4471 const void* XXH_RESTRICT
input,
4472 const void* XXH_RESTRICT secret)
4475 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4476 { __m128i*
const xacc = (__m128i *)
acc;
4479 const __m128i*
const xinput = (
const __m128i *)
input;
4482 const __m128i*
const xsecret = (
const __m128i *) secret;
4485 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m128i);
i++) {
4487 __m128i
const data_vec = _mm_loadu_si128 (xinput+
i);
4489 __m128i
const key_vec = _mm_loadu_si128 (xsecret+
i);
4491 __m128i
const data_key = _mm_xor_si128 (data_vec, key_vec);
4493 __m128i
const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4495 __m128i
const product = _mm_mul_epu32 (data_key, data_key_lo);
4497 __m128i
const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
4498 __m128i
const sum = _mm_add_epi64(xacc[
i], data_swap);
4500 xacc[
i] = _mm_add_epi64(product,
sum);
4503 XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
4505 XXH_FORCE_INLINE XXH_TARGET_SSE2
void
4506 XXH3_scrambleAcc_sse2(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4508 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4509 { __m128i*
const xacc = (__m128i*)
acc;
4512 const __m128i*
const xsecret = (
const __m128i *) secret;
4513 const __m128i prime32 = _mm_set1_epi32((
int)XXH_PRIME32_1);
4516 for (
i=0;
i < XXH_STRIPE_LEN/
sizeof(__m128i);
i++) {
4518 __m128i
const acc_vec = xacc[
i];
4519 __m128i
const shifted = _mm_srli_epi64 (acc_vec, 47);
4520 __m128i
const data_vec = _mm_xor_si128 (acc_vec, shifted);
4522 __m128i
const key_vec = _mm_loadu_si128 (xsecret+
i);
4523 __m128i
const data_key = _mm_xor_si128 (data_vec, key_vec);
4526 __m128i
const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4527 __m128i
const prod_lo = _mm_mul_epu32 (data_key, prime32);
4528 __m128i
const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
4529 xacc[
i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
4534 XXH_FORCE_INLINE XXH_TARGET_SSE2
void XXH3_initCustomSecret_sse2(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4536 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4537 (void)(&XXH_writeLE64);
4538 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE /
sizeof(__m128i);
4540 # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
4542 XXH_ALIGN(16)
const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
4543 __m128i
const seed = _mm_load_si128((__m128i
const*)seed64x2);
4545 __m128i
const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
4549 const void*
const src16 = XXH3_kSecret;
4550 __m128i* dst16 = (__m128i*) customSecret;
4551 # if defined(__GNUC__) || defined(__clang__)
4557 XXH_COMPILER_GUARD(dst16);
4559 XXH_ASSERT(((
size_t)src16 & 15) == 0);
4560 XXH_ASSERT(((
size_t)dst16 & 15) == 0);
4562 for (
i=0;
i < nbRounds; ++
i) {
4563 dst16[
i] = _mm_add_epi64(_mm_load_si128((
const __m128i *)src16+
i), seed);
4569 #if (XXH_VECTOR == XXH_NEON)
4572 XXH_FORCE_INLINE
void
4573 XXH3_scalarRound(
void* XXH_RESTRICT
acc,
void const* XXH_RESTRICT
input,
4574 void const* XXH_RESTRICT secret,
size_t lane);
4576 XXH_FORCE_INLINE
void
4577 XXH3_scalarScrambleRound(
void* XXH_RESTRICT
acc,
4578 void const* XXH_RESTRICT secret,
size_t lane);
4600 XXH_FORCE_INLINE
void
4601 XXH3_accumulate_512_neon(
void* XXH_RESTRICT
acc,
4602 const void* XXH_RESTRICT
input,
4603 const void* XXH_RESTRICT secret)
4605 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4606 XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4608 xxh_aliasing_uint64x2_t*
const xacc = (xxh_aliasing_uint64x2_t*)
acc;
4615 for (
i = XXH3_NEON_LANES;
i < XXH_ACC_NB;
i++) {
4616 XXH3_scalarRound(
acc,
input, secret,
i);
4620 for (;
i+1 < XXH3_NEON_LANES / 2;
i+=2) {
4622 uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (
i * 16));
4623 uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((
i+1) * 16));
4625 uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (
i * 16));
4626 uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((
i+1) * 16));
4628 uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
4629 uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
4631 uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
4632 uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
4647 uint32x4x2_t unzipped = vuzpq_u32(
4648 vreinterpretq_u32_u64(data_key_1),
4649 vreinterpretq_u32_u64(data_key_2)
4652 uint32x4_t data_key_lo = unzipped.val[0];
4654 uint32x4_t data_key_hi = unzipped.val[1];
4662 uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
4663 uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
4676 XXH_COMPILER_GUARD_W(sum_1);
4677 XXH_COMPILER_GUARD_W(sum_2);
4679 xacc[
i] = vaddq_u64(xacc[
i], sum_1);
4680 xacc[
i+1] = vaddq_u64(xacc[
i+1], sum_2);
4683 for (;
i < XXH3_NEON_LANES / 2;
i++) {
4685 uint64x2_t data_vec = XXH_vld1q_u64(xinput + (
i * 16));
4687 uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (
i * 16));
4689 uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
4691 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4694 uint32x2_t data_key_lo = vmovn_u64(data_key);
4696 uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4698 uint64x2_t
sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
4700 XXH_COMPILER_GUARD_W(
sum);
4702 xacc[
i] = vaddq_u64 (xacc[
i],
sum);
4706 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
4708 XXH_FORCE_INLINE
void
4709 XXH3_scrambleAcc_neon(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4711 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4713 { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*)
acc;
4715 uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
4719 for (
i = XXH3_NEON_LANES;
i < XXH_ACC_NB;
i++) {
4720 XXH3_scalarScrambleRound(
acc, secret,
i);
4722 for (
i=0;
i < XXH3_NEON_LANES / 2;
i++) {
4724 uint64x2_t acc_vec = xacc[
i];
4725 uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
4726 uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
4729 uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (
i * 16));
4730 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
4733 uint32x2_t data_key_lo = vmovn_u64(data_key);
4734 uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
4753 uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4755 prod_hi = vshlq_n_u64(prod_hi, 32);
4757 xacc[
i] = vmlal_u32(prod_hi, data_key_lo, prime);
4763 #if (XXH_VECTOR == XXH_VSX)
4765 XXH_FORCE_INLINE
void
4766 XXH3_accumulate_512_vsx(
void* XXH_RESTRICT
acc,
4767 const void* XXH_RESTRICT
input,
4768 const void* XXH_RESTRICT secret)
4771 xxh_aliasing_u64x2*
const xacc = (xxh_aliasing_u64x2*)
acc;
4772 xxh_u8
const*
const xinput = (xxh_u8
const*)
input;
4773 xxh_u8
const*
const xsecret = (xxh_u8
const*) secret;
4774 xxh_u64x2
const v32 = { 32, 32 };
4776 for (
i = 0;
i < XXH_STRIPE_LEN /
sizeof(xxh_u64x2);
i++) {
4778 xxh_u64x2
const data_vec = XXH_vec_loadu(xinput + 16*
i);
4780 xxh_u64x2
const key_vec = XXH_vec_loadu(xsecret + 16*
i);
4781 xxh_u64x2
const data_key = data_vec ^ key_vec;
4783 xxh_u32x4
const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4785 xxh_u64x2
const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4787 xxh_u64x2 acc_vec = xacc[
i];
4792 acc_vec += vec_permi(data_vec, data_vec, 2);
4794 acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4799 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
4801 XXH_FORCE_INLINE
void
4802 XXH3_scrambleAcc_vsx(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
4804 XXH_ASSERT((((
size_t)
acc) & 15) == 0);
4806 { xxh_aliasing_u64x2*
const xacc = (xxh_aliasing_u64x2*)
acc;
4807 const xxh_u8*
const xsecret = (
const xxh_u8*) secret;
4809 xxh_u64x2
const v32 = { 32, 32 };
4810 xxh_u64x2
const v47 = { 47, 47 };
4811 xxh_u32x4
const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
4813 for (
i = 0;
i < XXH_STRIPE_LEN /
sizeof(xxh_u64x2);
i++) {
4815 xxh_u64x2
const acc_vec = xacc[
i];
4816 xxh_u64x2
const data_vec = acc_vec ^ (acc_vec >> v47);
4819 xxh_u64x2
const key_vec = XXH_vec_loadu(xsecret + 16*
i);
4820 xxh_u64x2
const data_key = data_vec ^ key_vec;
4824 xxh_u64x2
const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
4826 xxh_u64x2
const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
4827 xacc[
i] = prod_odd + (prod_even << v32);
4833 #if (XXH_VECTOR == XXH_SVE)
4835 XXH_FORCE_INLINE
void
4836 XXH3_accumulate_512_sve(
void* XXH_RESTRICT
acc,
4837 const void* XXH_RESTRICT
input,
4838 const void* XXH_RESTRICT secret)
4843 svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4845 if (element_count >= 8) {
4846 svbool_t
mask = svptrue_pat_b64(SV_VL8);
4847 svuint64_t vacc = svld1_u64(
mask, xacc);
4849 svst1_u64(
mask, xacc, vacc);
4850 }
else if (element_count == 2) {
4851 svbool_t
mask = svptrue_pat_b64(SV_VL2);
4852 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4853 svuint64_t
acc1 = svld1_u64(
mask, xacc + 2);
4854 svuint64_t
acc2 = svld1_u64(
mask, xacc + 4);
4855 svuint64_t
acc3 = svld1_u64(
mask, xacc + 6);
4860 svst1_u64(
mask, xacc + 0, acc0);
4865 svbool_t
mask = svptrue_pat_b64(SV_VL4);
4866 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4867 svuint64_t
acc1 = svld1_u64(
mask, xacc + 4);
4870 svst1_u64(
mask, xacc + 0, acc0);
4875 XXH_FORCE_INLINE
void
4876 XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT
acc,
4877 const xxh_u8* XXH_RESTRICT
input,
4878 const xxh_u8* XXH_RESTRICT secret,
4881 if (nbStripes != 0) {
4885 svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
4887 if (element_count >= 8) {
4888 svbool_t
mask = svptrue_pat_b64(SV_VL8);
4889 svuint64_t vacc = svld1_u64(
mask, xacc + 0);
4892 svprfd(
mask, xinput + 128, SV_PLDL1STRM);
4897 }
while (nbStripes != 0);
4899 svst1_u64(
mask, xacc + 0, vacc);
4900 }
else if (element_count == 2) {
4901 svbool_t
mask = svptrue_pat_b64(SV_VL2);
4902 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4903 svuint64_t
acc1 = svld1_u64(
mask, xacc + 2);
4904 svuint64_t
acc2 = svld1_u64(
mask, xacc + 4);
4905 svuint64_t
acc3 = svld1_u64(
mask, xacc + 6);
4907 svprfd(
mask, xinput + 128, SV_PLDL1STRM);
4915 }
while (nbStripes != 0);
4917 svst1_u64(
mask, xacc + 0, acc0);
4922 svbool_t
mask = svptrue_pat_b64(SV_VL4);
4923 svuint64_t acc0 = svld1_u64(
mask, xacc + 0);
4924 svuint64_t
acc1 = svld1_u64(
mask, xacc + 4);
4926 svprfd(
mask, xinput + 128, SV_PLDL1STRM);
4932 }
while (nbStripes != 0);
4934 svst1_u64(
mask, xacc + 0, acc0);
4944 #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
4959 XXH_FORCE_INLINE xxh_u64
4960 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64
acc)
4964 __asm__(
"umaddl %x0, %w1, %w2, %x3" :
"=r" (ret) :
"r" (lhs),
"r" (rhs),
"r" (
acc));
4968 XXH_FORCE_INLINE xxh_u64
4969 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64
acc)
4971 return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) +
acc;
4982 XXH_FORCE_INLINE
void
4983 XXH3_scalarRound(
void* XXH_RESTRICT
acc,
4984 void const* XXH_RESTRICT
input,
4985 void const* XXH_RESTRICT secret,
4988 xxh_u64* xacc = (xxh_u64*)
acc;
4989 xxh_u8
const* xinput = (xxh_u8
const*)
input;
4990 xxh_u8
const* xsecret = (xxh_u8
const*) secret;
4991 XXH_ASSERT(lane < XXH_ACC_NB);
4992 XXH_ASSERT(((
size_t)
acc & (XXH_ACC_ALIGN-1)) == 0);
4994 xxh_u64
const data_val = XXH_readLE64(xinput + lane * 8);
4995 xxh_u64
const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
4996 xacc[lane ^ 1] += data_val;
4997 xacc[lane] = XXH_mult32to64_add64(data_key , data_key >> 32, xacc[lane]);
5005 XXH_FORCE_INLINE
void
5006 XXH3_accumulate_512_scalar(
void* XXH_RESTRICT
acc,
5007 const void* XXH_RESTRICT
input,
5008 const void* XXH_RESTRICT secret)
5012 #if defined(__GNUC__) && !defined(__clang__) \
5013 && (defined(__arm__) || defined(__thumb2__)) \
5014 && defined(__ARM_FEATURE_UNALIGNED) \
5015 && XXH_SIZE_OPT <= 0
5016 # pragma GCC unroll 8
5018 for (
i=0;
i < XXH_ACC_NB;
i++) {
5019 XXH3_scalarRound(
acc,
input, secret,
i);
5022 XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
5031 XXH_FORCE_INLINE
void
5032 XXH3_scalarScrambleRound(
void* XXH_RESTRICT
acc,
5033 void const* XXH_RESTRICT secret,
5036 xxh_u64*
const xacc = (xxh_u64*)
acc;
5037 const xxh_u8*
const xsecret = (
const xxh_u8*) secret;
5038 XXH_ASSERT((((
size_t)
acc) & (XXH_ACC_ALIGN-1)) == 0);
5039 XXH_ASSERT(lane < XXH_ACC_NB);
5041 xxh_u64
const key64 = XXH_readLE64(xsecret + lane * 8);
5042 xxh_u64 acc64 = xacc[lane];
5043 acc64 = XXH_xorshift64(acc64, 47);
5045 acc64 *= XXH_PRIME32_1;
5054 XXH_FORCE_INLINE
void
5055 XXH3_scrambleAcc_scalar(
void* XXH_RESTRICT
acc,
const void* XXH_RESTRICT secret)
5058 for (
i=0;
i < XXH_ACC_NB;
i++) {
5059 XXH3_scalarScrambleRound(
acc, secret,
i);
5063 XXH_FORCE_INLINE
void
5064 XXH3_initCustomSecret_scalar(
void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5071 const xxh_u8* kSecretPtr = XXH3_kSecret;
5072 XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
5074 #if defined(__GNUC__) && defined(__aarch64__)
5107 XXH_COMPILER_GUARD(kSecretPtr);
5109 {
int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
5111 for (
i=0;
i < nbRounds;
i++) {
5118 xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*
i) + seed64;
5119 xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*
i + 8) - seed64;
5120 XXH_writeLE64((xxh_u8*)customSecret + 16*
i, lo);
5121 XXH_writeLE64((xxh_u8*)customSecret + 16*
i + 8, hi);
5126 typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT,
const xxh_u8* XXH_RESTRICT,
const xxh_u8* XXH_RESTRICT, size_t);
5127 typedef void (*XXH3_f_scrambleAcc)(
void* XXH_RESTRICT,
const void*);
5128 typedef void (*XXH3_f_initCustomSecret)(
void* XXH_RESTRICT, xxh_u64);
5131 #if (XXH_VECTOR == XXH_AVX512)
5133 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
5134 #define XXH3_accumulate XXH3_accumulate_avx512
5135 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
5136 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
5138 #elif (XXH_VECTOR == XXH_AVX2)
5140 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
5141 #define XXH3_accumulate XXH3_accumulate_avx2
5142 #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
5143 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
5145 #elif (XXH_VECTOR == XXH_SSE2)
5147 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
5148 #define XXH3_accumulate XXH3_accumulate_sse2
5149 #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
5150 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
5152 #elif (XXH_VECTOR == XXH_NEON)
5154 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
5155 #define XXH3_accumulate XXH3_accumulate_neon
5156 #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
5157 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5159 #elif (XXH_VECTOR == XXH_VSX)
5161 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
5162 #define XXH3_accumulate XXH3_accumulate_vsx
5163 #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
5164 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5166 #elif (XXH_VECTOR == XXH_SVE)
5167 #define XXH3_accumulate_512 XXH3_accumulate_512_sve
5168 #define XXH3_accumulate XXH3_accumulate_sve
5169 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5170 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5174 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
5175 #define XXH3_accumulate XXH3_accumulate_scalar
5176 #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5177 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5181 #if XXH_SIZE_OPT >= 1
5182 # undef XXH3_initCustomSecret
5183 # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5186 XXH_FORCE_INLINE
void
5187 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT
acc,
5188 const xxh_u8* XXH_RESTRICT
input,
size_t len,
5189 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
5190 XXH3_f_accumulate f_acc,
5191 XXH3_f_scrambleAcc f_scramble)
5193 size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
5194 size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
5195 size_t const nb_blocks = (len - 1) / block_len;
5201 for (
n = 0;
n < nb_blocks;
n++) {
5202 f_acc(
acc,
input +
n*block_len, secret, nbStripesPerBlock);
5203 f_scramble(
acc, secret + secretSize - XXH_STRIPE_LEN);
5207 XXH_ASSERT(len > XXH_STRIPE_LEN);
5208 {
size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
5209 XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
5210 f_acc(
acc,
input + nb_blocks*block_len, secret, nbStripes);
5213 {
const xxh_u8*
const p =
input + len - XXH_STRIPE_LEN;
5214 #define XXH_SECRET_LASTACC_START 7
5215 XXH3_accumulate_512(
acc,
p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
5219 XXH_FORCE_INLINE xxh_u64
5220 XXH3_mix2Accs(
const xxh_u64* XXH_RESTRICT
acc,
const xxh_u8* XXH_RESTRICT secret)
5222 return XXH3_mul128_fold64(
5223 acc[0] ^ XXH_readLE64(secret),
5224 acc[1] ^ XXH_readLE64(secret+8) );
5228 XXH3_mergeAccs(
const xxh_u64* XXH_RESTRICT
acc,
const xxh_u8* XXH_RESTRICT secret, xxh_u64
start)
5230 xxh_u64 result64 =
start;
5233 for (
i = 0;
i < 4;
i++) {
5234 result64 += XXH3_mix2Accs(
acc+2*
i, secret + 16*
i);
5235 #if defined(__clang__) \
5236 && (defined(__arm__) || defined(__thumb__)) \
5237 && (defined(__ARM_NEON) || defined(__ARM_NEON__)) \
5238 && !defined(XXH_ENABLE_AUTOVECTORIZE)
5247 XXH_COMPILER_GUARD(result64);
5251 return XXH3_avalanche(result64);
5254 #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
5255 XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
5258 XXH3_hashLong_64b_internal(
const void* XXH_RESTRICT
input,
size_t len,
5259 const void* XXH_RESTRICT secret,
size_t secretSize,
5260 XXH3_f_accumulate f_acc,
5261 XXH3_f_scrambleAcc f_scramble)
5263 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64
acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5265 XXH3_hashLong_internal_loop(
acc, (
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretSize, f_acc, f_scramble);
5268 XXH_STATIC_ASSERT(
sizeof(
acc) == 64);
5270 #define XXH_SECRET_MERGEACCS_START 11
5271 XXH_ASSERT(secretSize >=
sizeof(
acc) + XXH_SECRET_MERGEACCS_START);
5272 return XXH3_mergeAccs(
acc, (
const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
5283 XXH3_hashLong_64b_withSecret(
const void* XXH_RESTRICT
input,
size_t len,
5284 XXH64_hash_t seed64,
const xxh_u8* XXH_RESTRICT secret,
size_t secretLen)
5287 return XXH3_hashLong_64b_internal(
input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
5297 XXH3_hashLong_64b_default(
const void* XXH_RESTRICT
input,
size_t len,
5298 XXH64_hash_t seed64,
const xxh_u8* XXH_RESTRICT secret,
size_t secretLen)
5300 (void)seed64; (void)secret; (void)secretLen;
5301 return XXH3_hashLong_64b_internal(
input, len, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
5316 XXH3_hashLong_64b_withSeed_internal(
const void*
input,
size_t len,
5318 XXH3_f_accumulate f_acc,
5319 XXH3_f_scrambleAcc f_scramble,
5320 XXH3_f_initCustomSecret f_initSec)
5322 #if XXH_SIZE_OPT <= 0
5324 return XXH3_hashLong_64b_internal(
input, len,
5325 XXH3_kSecret,
sizeof(XXH3_kSecret),
5328 { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5329 f_initSec(secret, seed);
5330 return XXH3_hashLong_64b_internal(
input, len, secret,
sizeof(secret),
5339 XXH3_hashLong_64b_withSeed(
const void* XXH_RESTRICT
input,
size_t len,
5340 XXH64_hash_t seed,
const xxh_u8* XXH_RESTRICT secret,
size_t secretLen)
5342 (void)secret; (void)secretLen;
5343 return XXH3_hashLong_64b_withSeed_internal(
input, len, seed,
5344 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
5348 typedef XXH64_hash_t (*XXH3_hashLong64_f)(
const void* XXH_RESTRICT, size_t,
5352 XXH3_64bits_internal(
const void* XXH_RESTRICT
input,
size_t len,
5353 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen,
5354 XXH3_hashLong64_f f_hashLong)
5365 return XXH3_len_0to16_64b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, seed64);
5367 return XXH3_len_17to128_64b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
5368 if (len <= XXH3_MIDSIZE_MAX)
5369 return XXH3_len_129to240_64b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
5370 return f_hashLong(
input, len, seed64, (
const xxh_u8*)secret, secretLen);
5379 return XXH3_64bits_internal(
input,
length, 0, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5386 return XXH3_64bits_internal(
input,
length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5393 return XXH3_64bits_internal(
input,
length, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5399 if (
length <= XXH3_MIDSIZE_MAX)
5400 return XXH3_64bits_internal(
input,
length, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), NULL);
5401 return XXH3_hashLong_64b_withSecret(
input,
length, seed, (
const xxh_u8*)secret, secretSize);
5406 #ifndef XXH_NO_STREAM
5430 static XXH_MALLOCF void* XXH_alignedMalloc(
size_t s,
size_t align)
5432 XXH_ASSERT(align <= 128 && align >= 8);
5433 XXH_ASSERT((align & (align-1)) == 0);
5434 XXH_ASSERT(
s != 0 &&
s < (
s + align));
5436 xxh_u8*
base = (xxh_u8*)XXH_malloc(
s + align);
5444 size_t offset = align - ((size_t)
base & (align - 1));
5448 XXH_ASSERT((
size_t)
ptr % align == 0);
5461 static void XXH_alignedFree(
void*
p)
5464 xxh_u8*
ptr = (xxh_u8*)
p;
5476 if (state==NULL)
return NULL;
5477 XXH3_INITSTATE(state);
5484 XXH_alignedFree(statePtr);
5492 XXH_memcpy(dst_state, src_state,
sizeof(*dst_state));
5498 const void* secret,
size_t secretSize)
5500 size_t const initStart = offsetof(
XXH3_state_t, bufferedSize);
5501 size_t const initLength = offsetof(
XXH3_state_t, nbStripesPerBlock) - initStart;
5502 XXH_ASSERT(offsetof(
XXH3_state_t, nbStripesPerBlock) > initStart);
5503 XXH_ASSERT(statePtr != NULL);
5505 memset((
char*)statePtr + initStart, 0, initLength);
5506 statePtr->acc[0] = XXH_PRIME32_3;
5507 statePtr->acc[1] = XXH_PRIME64_1;
5508 statePtr->acc[2] = XXH_PRIME64_2;
5509 statePtr->acc[3] = XXH_PRIME64_3;
5510 statePtr->acc[4] = XXH_PRIME64_4;
5511 statePtr->acc[5] = XXH_PRIME32_2;
5512 statePtr->acc[6] = XXH_PRIME64_5;
5513 statePtr->acc[7] = XXH_PRIME32_1;
5514 statePtr->seed =
seed;
5515 statePtr->useSeed = (
seed != 0);
5516 statePtr->extSecret = (
const unsigned char*)secret;
5518 statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
5519 statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
5527 XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5536 XXH3_reset_internal(statePtr, 0, secret, secretSize);
5548 if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5549 XXH3_initCustomSecret(statePtr->customSecret, seed);
5550 XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5561 XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5562 statePtr->useSeed = 1;
5583 XXH_FORCE_INLINE
const xxh_u8 *
5584 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT
acc,
5585 size_t* XXH_RESTRICT nbStripesSoFarPtr,
size_t nbStripesPerBlock,
5586 const xxh_u8* XXH_RESTRICT
input,
size_t nbStripes,
5587 const xxh_u8* XXH_RESTRICT secret,
size_t secretLimit,
5588 XXH3_f_accumulate f_acc,
5589 XXH3_f_scrambleAcc f_scramble)
5591 const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
5593 if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
5595 size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
5599 f_acc(
acc,
input, initialSecret, nbStripesThisIter);
5600 f_scramble(
acc, secret + secretLimit);
5601 input += nbStripesThisIter * XXH_STRIPE_LEN;
5602 nbStripes -= nbStripesThisIter;
5604 nbStripesThisIter = nbStripesPerBlock;
5605 initialSecret = secret;
5606 }
while (nbStripes >= nbStripesPerBlock);
5607 *nbStripesSoFarPtr = 0;
5610 if (nbStripes > 0) {
5611 f_acc(
acc,
input, initialSecret, nbStripes);
5612 input += nbStripes * XXH_STRIPE_LEN;
5613 *nbStripesSoFarPtr += nbStripes;
5619 #ifndef XXH3_STREAM_USE_STACK
5620 # if XXH_SIZE_OPT <= 0 && !defined(__clang__)
5621 # define XXH3_STREAM_USE_STACK 1
5629 const xxh_u8* XXH_RESTRICT
input,
size_t len,
5630 XXH3_f_accumulate f_acc,
5631 XXH3_f_scrambleAcc f_scramble)
5634 XXH_ASSERT(len == 0);
5638 XXH_ASSERT(state != NULL);
5639 {
const xxh_u8*
const bEnd =
input + len;
5640 const unsigned char*
const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5641 #
if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5646 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64
acc[8];
5647 XXH_memcpy(
acc, state->acc,
sizeof(
acc));
5649 xxh_u64* XXH_RESTRICT
const acc = state->acc;
5651 state->totalLen += len;
5652 XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
5655 if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
5656 XXH_memcpy(state->buffer + state->bufferedSize,
input, len);
5657 state->bufferedSize += (XXH32_hash_t)len;
5662 #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
5663 XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);
5669 if (state->bufferedSize) {
5670 size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
5671 XXH_memcpy(state->buffer + state->bufferedSize,
input, loadSize);
5673 XXH3_consumeStripes(
acc,
5674 &state->nbStripesSoFar, state->nbStripesPerBlock,
5675 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
5676 secret, state->secretLimit,
5678 state->bufferedSize = 0;
5680 XXH_ASSERT(
input < bEnd);
5681 if (bEnd -
input > XXH3_INTERNALBUFFER_SIZE) {
5682 size_t nbStripes = (size_t)(bEnd - 1 -
input) / XXH_STRIPE_LEN;
5684 &state->nbStripesSoFar, state->nbStripesPerBlock,
5686 secret, state->secretLimit,
5688 XXH_memcpy(state->buffer +
sizeof(state->buffer) - XXH_STRIPE_LEN,
input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5692 XXH_ASSERT(
input < bEnd);
5693 XXH_ASSERT(bEnd -
input <= XXH3_INTERNALBUFFER_SIZE);
5694 XXH_ASSERT(state->bufferedSize == 0);
5695 XXH_memcpy(state->buffer,
input, (
size_t)(bEnd-
input));
5696 state->bufferedSize = (XXH32_hash_t)(bEnd-
input);
5697 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5699 XXH_memcpy(state->acc,
acc,
sizeof(
acc));
5710 return XXH3_update(state, (
const xxh_u8*)
input, len,
5711 XXH3_accumulate, XXH3_scrambleAcc);
5715 XXH_FORCE_INLINE
void
5718 const unsigned char* secret)
5720 xxh_u8 lastStripe[XXH_STRIPE_LEN];
5721 const xxh_u8* lastStripePtr;
5727 XXH_memcpy(
acc, state->acc,
sizeof(state->acc));
5728 if (state->bufferedSize >= XXH_STRIPE_LEN) {
5730 size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
5731 size_t nbStripesSoFar = state->nbStripesSoFar;
5732 XXH3_consumeStripes(
acc,
5733 &nbStripesSoFar, state->nbStripesPerBlock,
5734 state->buffer, nbStripes,
5735 secret, state->secretLimit,
5736 XXH3_accumulate, XXH3_scrambleAcc);
5737 lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
5740 size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
5741 XXH_ASSERT(state->bufferedSize > 0);
5742 XXH_memcpy(lastStripe, state->buffer +
sizeof(state->buffer) - catchupSize, catchupSize);
5743 XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
5744 lastStripePtr = lastStripe;
5747 XXH3_accumulate_512(
acc,
5749 secret + state->secretLimit - XXH_SECRET_LASTACC_START);
5755 const unsigned char*
const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5756 if (state->totalLen > XXH3_MIDSIZE_MAX) {
5758 XXH3_digest_long(
acc, state, secret);
5759 return XXH3_mergeAccs(
acc,
5760 secret + XXH_SECRET_MERGEACCS_START,
5761 (xxh_u64)state->totalLen * XXH_PRIME64_1);
5767 secret, state->secretLimit + XXH_STRIPE_LEN);
5790 XXH3_len_1to3_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5793 XXH_ASSERT(
input != NULL);
5794 XXH_ASSERT(1 <= len && len <= 3);
5795 XXH_ASSERT(secret != NULL);
5802 xxh_u8
const c2 =
input[len >> 1];
5803 xxh_u8
const c3 =
input[len - 1];
5804 xxh_u32
const combinedl = ((xxh_u32)
c1 <<16) | ((xxh_u32)
c2 << 24)
5805 | ((xxh_u32)
c3 << 0) | ((xxh_u32)len << 8);
5806 xxh_u32
const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
5807 xxh_u64
const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
5808 xxh_u64
const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
5809 xxh_u64
const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
5810 xxh_u64
const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
5812 h128.
low64 = XXH64_avalanche(keyed_lo);
5813 h128.
high64 = XXH64_avalanche(keyed_hi);
5819 XXH3_len_4to8_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5821 XXH_ASSERT(
input != NULL);
5822 XXH_ASSERT(secret != NULL);
5823 XXH_ASSERT(4 <= len && len <= 8);
5824 seed ^= (xxh_u64)XXH_swap32((xxh_u32)
seed) << 32;
5825 { xxh_u32
const input_lo = XXH_readLE32(
input);
5826 xxh_u32
const input_hi = XXH_readLE32(
input + len - 4);
5827 xxh_u64
const input_64 = input_lo + ((xxh_u64)input_hi << 32);
5828 xxh_u64
const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
5829 xxh_u64
const keyed = input_64 ^ bitflip;
5832 XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
5838 m128.
low64 *= PRIME_MX2;
5846 XXH3_len_9to16_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5848 XXH_ASSERT(
input != NULL);
5849 XXH_ASSERT(secret != NULL);
5850 XXH_ASSERT(9 <= len && len <= 16);
5851 { xxh_u64
const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
5852 xxh_u64
const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
5853 xxh_u64
const input_lo = XXH_readLE64(
input);
5854 xxh_u64 input_hi = XXH_readLE64(
input + len - 8);
5855 XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
5860 m128.
low64 += (xxh_u64)(len - 1) << 54;
5861 input_hi ^= bitfliph;
5869 if (
sizeof(
void *) <
sizeof(xxh_u64)) {
5876 m128.
high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
5902 m128.
high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
5921 XXH3_len_0to16_128b(
const xxh_u8*
input,
size_t len,
const xxh_u8* secret,
XXH64_hash_t seed)
5923 XXH_ASSERT(len <= 16);
5924 {
if (len > 8)
return XXH3_len_9to16_128b(
input, len, secret, seed);
5925 if (len >= 4)
return XXH3_len_4to8_128b(
input, len, secret, seed);
5926 if (len)
return XXH3_len_1to3_128b(
input, len, secret, seed);
5928 xxh_u64
const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
5929 xxh_u64
const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
5930 h128.
low64 = XXH64_avalanche(seed ^ bitflipl);
5931 h128.
high64 = XXH64_avalanche( seed ^ bitfliph);
5940 XXH128_mix32B(
XXH128_hash_t acc,
const xxh_u8* input_1,
const xxh_u8* input_2,
5943 acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
5944 acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
5945 acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
5946 acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
5952 XXH3_len_17to128_128b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
5953 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
5957 XXH_ASSERT(16 < len && len <= 128);
5960 acc.low64 = len * XXH_PRIME64_1;
5963 #if XXH_SIZE_OPT >= 1
5966 unsigned int i = (
unsigned int)(len - 1) / 32;
5985 h128.
high64 = (
acc.low64 * XXH_PRIME64_1)
5986 + (
acc.high64 * XXH_PRIME64_4)
5987 + ((len -
seed) * XXH_PRIME64_2);
5996 XXH3_len_129to240_128b(
const xxh_u8* XXH_RESTRICT
input,
size_t len,
5997 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
6001 XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
6005 acc.low64 = len * XXH_PRIME64_1;
6013 for (
i = 32;
i < 160;
i += 32) {
6020 acc.low64 = XXH3_avalanche(
acc.low64);
6021 acc.high64 = XXH3_avalanche(
acc.high64);
6027 for (
i=160;
i <= len;
i += 32) {
6031 secret + XXH3_MIDSIZE_STARTOFFSET +
i - 160,
6043 h128.
high64 = (
acc.low64 * XXH_PRIME64_1)
6044 + (
acc.high64 * XXH_PRIME64_4)
6045 + ((len -
seed) * XXH_PRIME64_2);
6054 XXH3_hashLong_128b_internal(
const void* XXH_RESTRICT
input,
size_t len,
6055 const xxh_u8* XXH_RESTRICT secret,
size_t secretSize,
6056 XXH3_f_accumulate f_acc,
6057 XXH3_f_scrambleAcc f_scramble)
6059 XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64
acc[XXH_ACC_NB] = XXH3_INIT_ACC;
6061 XXH3_hashLong_internal_loop(
acc, (
const xxh_u8*)
input, len, secret, secretSize, f_acc, f_scramble);
6064 XXH_STATIC_ASSERT(
sizeof(
acc) == 64);
6065 XXH_ASSERT(secretSize >=
sizeof(
acc) + XXH_SECRET_MERGEACCS_START);
6068 secret + XXH_SECRET_MERGEACCS_START,
6069 (xxh_u64)len * XXH_PRIME64_1);
6072 -
sizeof(
acc) - XXH_SECRET_MERGEACCS_START,
6073 ~((xxh_u64)len * XXH_PRIME64_2));
6082 XXH3_hashLong_128b_default(
const void* XXH_RESTRICT
input,
size_t len,
6084 const void* XXH_RESTRICT secret,
size_t secretLen)
6086 (void)seed64; (void)secret; (void)secretLen;
6087 return XXH3_hashLong_128b_internal(
input, len, XXH3_kSecret,
sizeof(XXH3_kSecret),
6088 XXH3_accumulate, XXH3_scrambleAcc);
6099 XXH3_hashLong_128b_withSecret(
const void* XXH_RESTRICT
input,
size_t len,
6101 const void* XXH_RESTRICT secret,
size_t secretLen)
6104 return XXH3_hashLong_128b_internal(
input, len, (
const xxh_u8*)secret, secretLen,
6105 XXH3_accumulate, XXH3_scrambleAcc);
6109 XXH3_hashLong_128b_withSeed_internal(
const void* XXH_RESTRICT
input,
size_t len,
6111 XXH3_f_accumulate f_acc,
6112 XXH3_f_scrambleAcc f_scramble,
6113 XXH3_f_initCustomSecret f_initSec)
6116 return XXH3_hashLong_128b_internal(
input, len,
6117 XXH3_kSecret,
sizeof(XXH3_kSecret),
6119 { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6120 f_initSec(secret, seed64);
6121 return XXH3_hashLong_128b_internal(
input, len, (
const xxh_u8*)secret,
sizeof(secret),
6130 XXH3_hashLong_128b_withSeed(
const void*
input,
size_t len,
6131 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen)
6133 (void)secret; (void)secretLen;
6134 return XXH3_hashLong_128b_withSeed_internal(
input, len, seed64,
6135 XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
6138 typedef XXH128_hash_t (*XXH3_hashLong128_f)(
const void* XXH_RESTRICT, size_t,
6142 XXH3_128bits_internal(
const void*
input,
size_t len,
6143 XXH64_hash_t seed64,
const void* XXH_RESTRICT secret,
size_t secretLen,
6144 XXH3_hashLong128_f f_hl128)
6154 return XXH3_len_0to16_128b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, seed64);
6156 return XXH3_len_17to128_128b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
6157 if (len <= XXH3_MIDSIZE_MAX)
6158 return XXH3_len_129to240_128b((
const xxh_u8*)
input, len, (
const xxh_u8*)secret, secretLen, seed64);
6159 return f_hl128(
input, len, seed64, secret, secretLen);
6168 return XXH3_128bits_internal(
input, len, 0,
6169 XXH3_kSecret,
sizeof(XXH3_kSecret),
6170 XXH3_hashLong_128b_default);
6177 return XXH3_128bits_internal(
input, len, 0,
6178 (
const xxh_u8*)secret, secretSize,
6179 XXH3_hashLong_128b_withSecret);
6186 return XXH3_128bits_internal(
input, len, seed,
6187 XXH3_kSecret,
sizeof(XXH3_kSecret),
6188 XXH3_hashLong_128b_withSeed);
6195 if (len <= XXH3_MIDSIZE_MAX)
6196 return XXH3_128bits_internal(
input, len, seed, XXH3_kSecret,
sizeof(XXH3_kSecret), NULL);
6197 return XXH3_hashLong_128b_withSecret(
input, len, seed, secret, secretSize);
6209 #ifndef XXH_NO_STREAM
6240 return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
6253 const unsigned char*
const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6254 if (state->totalLen > XXH3_MIDSIZE_MAX) {
6256 XXH3_digest_long(
acc, state, secret);
6257 XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >=
sizeof(
acc) + XXH_SECRET_MERGEACCS_START);
6260 secret + XXH_SECRET_MERGEACCS_START,
6261 (xxh_u64)state->totalLen * XXH_PRIME64_1);
6263 secret + state->secretLimit + XXH_STRIPE_LEN
6264 -
sizeof(
acc) - XXH_SECRET_MERGEACCS_START,
6265 ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
6273 secret, state->secretLimit + XXH_STRIPE_LEN);
6285 return !(memcmp(&
h1, &h2,
sizeof(
h1)));
6299 if (hcmp)
return hcmp;
6310 if (XXH_CPU_LITTLE_ENDIAN) {
6311 hash.high64 = XXH_swap64(
hash.high64);
6312 hash.low64 = XXH_swap64(
hash.low64);
6314 XXH_memcpy(dst, &
hash.high64,
sizeof(
hash.high64));
6315 XXH_memcpy((
char*)dst +
sizeof(
hash.high64), &
hash.low64,
sizeof(
hash.low64));
6323 h.high64 = XXH_readBE64(
src);
6324 h.low64 = XXH_readBE64(
src->digest + 8);
6334 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
6336 XXH_FORCE_INLINE
void XXH3_combine16(
void* dst,
XXH128_hash_t h128)
6338 XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.
low64 );
6339 XXH_writeLE64( (
char*)dst+8, XXH_readLE64((
char*)dst+8) ^ h128.
high64 );
6344 XXH3_generateSecret(
XXH_NOESCAPE void* secretBuffer,
size_t secretSize,
XXH_NOESCAPE const void* customSeed,
size_t customSeedSize)
6346 #if (XXH_DEBUGLEVEL >= 1)
6347 XXH_ASSERT(secretBuffer != NULL);
6351 if (secretBuffer == NULL)
return XXH_ERROR;
6355 if (customSeedSize == 0) {
6356 customSeed = XXH3_kSecret;
6357 customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6359 #if (XXH_DEBUGLEVEL >= 1)
6360 XXH_ASSERT(customSeed != NULL);
6362 if (customSeed == NULL)
return XXH_ERROR;
6367 while (
pos < secretSize) {
6368 size_t const toCopy = XXH_MIN((secretSize -
pos), customSeedSize);
6369 memcpy((
char*)secretBuffer +
pos, customSeed, toCopy);
6373 {
size_t const nbSeg16 = secretSize / 16;
6377 for (
n=0;
n<nbSeg16;
n++) {
6378 XXH128_hash_t const h128 = XXH128(&scrambler,
sizeof(scrambler),
n);
6379 XXH3_combine16((
char*)secretBuffer +
n*16, h128);
6391 XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6392 XXH3_initCustomSecret(secret, seed);
6393 XXH_ASSERT(secretBuffer != NULL);
6394 memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6400 #if XXH_VECTOR == XXH_AVX2 \
6401 && defined(__GNUC__) && !defined(__clang__) \
6402 && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0
6403 # pragma GCC pop_options
6416 #if defined (__cplusplus)