[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 5 commits: Remove tmp files after toolchain check
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Wed Jan 8 13:24:52 UTC 2025
Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
6c12b6cf by Bryan Richter at 2025-01-07T18:15:02-05:00
Remove tmp files after toolchain check
Fixes #25620
- - - - -
42826a89 by Cheng Shao at 2025-01-07T18:15:39-05:00
xxhash: bump to v0.8.3
- - - - -
185f17e4 by sheaf at 2025-01-07T18:16:15-05:00
Fix typo in GHC.Tc.Solver.Solve.runTcPluginsWanted
- - - - -
23099752 by Luite Stegeman at 2025-01-08T00:33:33+01:00
Add flags for switching off speculative evaluation.
We found that speculative evaluation can increase the amount of
allocations in some circumstances. This patch adds new flags for
selectively disabling speculative evaluation, allowing us to
test the effect of the optimization.
The new flags are:
-fspec-eval
globally enable speculative evaluation
-fspec-eval-dictfun
enable speculative evaluation for dictionary functions (no effect
if speculative evaluation is globally disabled)
The new flags are on by default for all optimisation levels.
See #25284
- - - - -
956f8410 by Ben Gamari at 2025-01-08T08:24:04-05:00
compiler: Fix CPP guards around ghc_unique_counter64
The `ghc_unique_counter64` symbol was introduced in the RTS in the
64-bit unique refactor (!10568) which has been backported to %9.6.7 and
%9.8.4. Update the CPP to reflect this.
Fixes #25576.
- - - - -
16 changed files:
- compiler/GHC/CoreToStg/Prep.hs
- compiler/GHC/Driver/Config/CoreToStg/Prep.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Tc/Solver/Solve.hs
- compiler/cbits/genSym.c
- docs/users_guide/using-optimisation.rst
- m4/ghc_toolchain.m4
- rts/xxhash.h
- + testsuite/tests/core-to-stg/T25284/A.hs
- + testsuite/tests/core-to-stg/T25284/B.hs
- + testsuite/tests/core-to-stg/T25284/Cls.hs
- + testsuite/tests/core-to-stg/T25284/Main.hs
- + testsuite/tests/core-to-stg/T25284/T25284.stdout
- + testsuite/tests/core-to-stg/T25284/all.T
Changes:
=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -2051,6 +2051,16 @@ conceptually.
See also Note [Floats and FloatDecision] for how we maintain whole groups of
floats and how far they go.
+Note [Controlling Speculative Evaluation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Most of the time, speculative evaluation has a positive effect on performance,
+but we have found a case where speculative evaluation of dictionary functions
+leads to a performance regression #25284.
+
+Therefore we have some flags to control it. See the optimization section in
+the User's Guide for the description of these flags and when to use them.
+
Note [Floats and FloatDecision]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We have a special datatype `Floats` for modelling a telescope of `FloatingBind`
@@ -2275,7 +2285,15 @@ mkNonRecFloat env lev bndr rhs
}
is_hnf = exprIsHNF rhs
- ok_for_spec = exprOkForSpecEval (not . is_rec_call) rhs
+ cfg = cpe_config env
+
+ ok_for_spec = exprOkForSpecEval call_ok_for_spec rhs
+ -- See Note [Controlling Speculative Evaluation]
+ call_ok_for_spec x
+ | is_rec_call x = False
+ | not (cp_specEval cfg) = False
+ | not (cp_specEvalDFun cfg) && isDFunId x = False
+ | otherwise = True
is_rec_call = (`elemUnVarSet` cpe_rec_ids env)
-- See Note [Pin evaluatedness on floats]
@@ -2517,6 +2535,11 @@ data CorePrepConfig = CorePrepConfig
-- ^ Configuration for arity analysis ('exprEtaExpandArity').
-- See Note [Eta expansion of arguments in CorePrep]
-- When 'Nothing' (e.g., -O0, -O1), use the cheaper 'exprArity' instead
+ , cp_specEval :: !Bool
+ -- ^ Whether to perform speculative evaluation
+ -- See Note [Controlling Speculative Evaluation]
+ , cp_specEvalDFun :: !Bool
+ -- ^ Whether to perform speculative evaluation on DFuns
}
data CorePrepEnv
=====================================
compiler/GHC/Driver/Config/CoreToStg/Prep.hs
=====================================
@@ -24,6 +24,8 @@ initCorePrepConfig hsc_env = do
, cp_arityOpts = if gopt Opt_DoCleverArgEtaExpansion dflags
then Just (initArityOpts dflags)
else Nothing
+ , cp_specEval = gopt Opt_SpecEval dflags
+ , cp_specEvalDFun = gopt Opt_SpecEvalDictFun dflags
}
initCorePrepPgmConfig :: DynFlags -> [Var] -> CorePrepPgmConfig
=====================================
compiler/GHC/Driver/DynFlags.hs
=====================================
@@ -1287,6 +1287,8 @@ optLevelFlags -- see Note [Documenting optimisation flags]
-- RegsGraph suffers performance regression. See #7679
-- , ([2], Opt_StaticArgumentTransformation)
-- Static Argument Transformation needs investigation. See #9374
+ , ([0,1,2], Opt_SpecEval)
+ , ([0,1,2], Opt_SpecEvalDictFun)
]
=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -674,6 +674,9 @@ data GeneralFlag
| Opt_NumConstantFolding
| Opt_CoreConstantFolding
| Opt_FastPAPCalls -- #6084
+ | Opt_SpecEval
+ | Opt_SpecEvalDictFun -- See Note [Controlling Speculative Evaluation]
+
-- Inference flags
| Opt_DoTagInferenceChecks
@@ -912,6 +915,8 @@ optimisationFlags = EnumSet.fromList
, Opt_WorkerWrapper
, Opt_WorkerWrapperUnlift
, Opt_SolveConstantDicts
+ , Opt_SpecEval
+ , Opt_SpecEvalDictFun
]
-- | The set of flags which affect code generation and can change a program's
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -2544,6 +2544,8 @@ fFlagsDeps = [
flagSpec "num-constant-folding" Opt_NumConstantFolding,
flagSpec "core-constant-folding" Opt_CoreConstantFolding,
flagSpec "fast-pap-calls" Opt_FastPAPCalls,
+ flagSpec "spec-eval" Opt_SpecEval,
+ flagSpec "spec-eval-dictfun" Opt_SpecEvalDictFun,
flagSpec "cmm-control-flow" Opt_CmmControlFlow,
flagSpec "show-warning-groups" Opt_ShowWarnGroups,
flagSpec "hide-source-paths" Opt_HideSourcePaths,
=====================================
compiler/GHC/Tc/Solver/Solve.hs
=====================================
@@ -1432,7 +1432,7 @@ runTcPluginsWanted wc@(WC { wc_simple = simples1 })
; wanted <- TcS.zonkSimples simples1 -- Plugin requires zonked inputs
; traceTcS "Running plugins (" (vcat [ text "Given:" <+> ppr given
- , text "Watned:" <+> ppr wanted ])
+ , text "Wanted:" <+> ppr wanted ])
; p <- runTcPluginSolvers solvers (given, bagToList wanted)
; let (_, solved_wanted) = pluginSolvedCts p
(_, unsolved_wanted) = pluginInputCts p
=====================================
compiler/cbits/genSym.c
=====================================
@@ -9,6 +9,18 @@
//
// The CPP is thus about the RTS version GHC is linked against, and not the
// version of the GHC being built.
-#if !MIN_VERSION_GLASGOW_HASKELL(9,9,0,0)
+
+#if MIN_VERSION_GLASGOW_HASKELL(9,9,0,0)
+// Unique64 patch was present in 9.10 and later
+#define HAVE_UNIQUE64 1
+#elif !MIN_VERSION_GLASGOW_HASKELL(9,9,0,0) && MIN_VERSION_GLASGOW_HASKELL(9,8,4,0)
+// Unique64 patch was backported to 9.8.4
+#define HAVE_UNIQUE64 1
+#elif !MIN_VERSION_GLASGOW_HASKELL(9,9,0,0) && MIN_VERSION_GLASGOW_HASKELL(9,6,7,0)
+// Unique64 patch was backported to 9.6.7
+#define HAVE_UNIQUE64 1
+#endif
+
+#if !defined(HAVE_UNIQUE64)
HsWord64 ghc_unique_counter64 = 0;
#endif
=====================================
docs/users_guide/using-optimisation.rst
=====================================
@@ -405,6 +405,55 @@ as such you shouldn't need to set any of them explicitly. A flag
intermediate language, where it is able to common up some subexpressions
that differ in their types, but not their representation.
+.. ghc-flag:: -fspec-eval
+ :shortdesc: Enables speculative evaluation.
+ :type: dynamic
+ :category:
+ :reverse: -fno-spec-eval
+
+ :default: on
+ :since: 9.14.1
+
+ Enables speculative evaluation which usually results in fewer allocations.
+ Enabling speculative evaluation should not cause performance regressions.
+ If you encounter any, please open a ticket.
+
+ Note that disabling this flag will switch off speculative evaluation
+ completely, causing :ghc-flag:`-fspec-eval-dictfun` to have
+ no effect.
+
+.. ghc-flag:: -fspec-eval-dictfun
+ :shortdesc: Enables speculative evaluation of dictionary functions.
+ :type: dynamic
+ :category:
+ :reverse: -fno-spec-eval-dictfun
+
+ :default: on
+ :since: 9.14.1
+
+ Enables speculative (strict) evaluation of dictionary functions.
+
+ This is best explained with an example ::
+
+ instance C a => D a where ...
+
+ g :: D a => a -> Int
+ g x = ...
+
+ f :: C a => a -> Int
+ f x = g x
+
+ Function `f` has to pass a `D a` dictionary to `g`, and uses a dictionary
+ function `C a => D a` to compute it. If speculative evaluation for
+ dictionary functions is enabled, this dictionary is computed
+ strictly.
+
+ Speculative evalation of dictionary functions can lead to slightly better
+ performance, because a thunk is avoided. However, it results in unnecessary
+ computation and allocation if the dictionary goes unused. This causes
+ a significant increase in allocation if the dictionary is large.
+ See (:ghc-ticket:`25284`).
+
.. ghc-flag:: -fdicts-cheap
:shortdesc: Make dictionary-valued expressions seem cheap to the optimiser.
:type: dynamic
=====================================
m4/ghc_toolchain.m4
=====================================
@@ -187,6 +187,7 @@ AC_DEFUN([VALIDATE_GHC_TOOLCHAIN],[
"$GHC_TOOLCHAIN_BIN" format --input="$1" --output="$o1"
"$GHC_TOOLCHAIN_BIN" format --input="$2" --output="$o2"
diff_output=`diff "$o1" "$o2" 2>&1`
+ rm -f "$o1" "$o2"
if test -z "$diff_output"; then
true
else
=====================================
rts/xxhash.h
=====================================
@@ -1,7 +1,7 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Header File
- * Copyright (C) 2012-2021 Yann Collet
+ * Copyright (C) 2012-2023 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
@@ -130,6 +130,7 @@
* }
* @endcode
*
+ *
* @anchor streaming_example
* **Streaming**
*
@@ -165,6 +166,77 @@
* }
* @endcode
*
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ *
+ * @anchor canonical_representation_example
+ * **Canonical Representation**
+ *
+ * The default return values from XXH functions are unsigned 32, 64 and 128 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ *
+ * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
+ * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
+ * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
+ *
+ * @code{.c}
+ * #include <stdio.h>
+ * #include "xxhash.h"
+ *
+ * // Example for a function which prints XXH32_hash_t in human readable format
+ * void printXxh32(XXH32_hash_t hash)
+ * {
+ * XXH32_canonical_t cano;
+ * XXH32_canonicalFromHash(&cano, hash);
+ * size_t i;
+ * for(i = 0; i < sizeof(cano.digest); ++i) {
+ * printf("%02x", cano.digest[i]);
+ * }
+ * printf("\n");
+ * }
+ *
+ * // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
+ * XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
+ * {
+ * XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
+ * return hash;
+ * }
+ * @endcode
+ *
+ *
* @file xxhash.h
* xxHash prototypes and implementation
*/
@@ -261,7 +333,7 @@ extern "C" {
/* make all functions private */
# undef XXH_PUBLIC_API
# if defined(__GNUC__)
-# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# define XXH_PUBLIC_API static __inline __attribute__((__unused__))
# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define XXH_PUBLIC_API static inline
# elif defined(_MSC_VER)
@@ -373,7 +445,7 @@ extern "C" {
/*! @brief Marks a global symbol. */
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
-# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+# if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
# ifdef XXH_EXPORT
# define XXH_PUBLIC_API __declspec(dllexport)
# elif XXH_IMPORT
@@ -449,7 +521,7 @@ extern "C" {
/* specific declaration modes for Windows */
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
-# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+# if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
# ifdef XXH_EXPORT
# define XXH_PUBLIC_API __declspec(dllexport)
# elif XXH_IMPORT
@@ -461,9 +533,9 @@ extern "C" {
#endif
#if defined (__GNUC__)
-# define XXH_CONSTF __attribute__((const))
-# define XXH_PUREF __attribute__((pure))
-# define XXH_MALLOCF __attribute__((malloc))
+# define XXH_CONSTF __attribute__((__const__))
+# define XXH_PUREF __attribute__((__pure__))
+# define XXH_MALLOCF __attribute__((__malloc__))
#else
# define XXH_CONSTF /* disable */
# define XXH_PUREF
@@ -475,7 +547,7 @@ extern "C" {
***************************************/
#define XXH_VERSION_MAJOR 0
#define XXH_VERSION_MINOR 8
-#define XXH_VERSION_RELEASE 2
+#define XXH_VERSION_RELEASE 3
/*! @brief Version number, encoded as two digits each */
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
@@ -517,7 +589,11 @@ typedef uint32_t XXH32_hash_t;
#elif !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# ifdef _AIX
+# include <inttypes.h>
+# else
+# include <stdint.h>
+# endif
typedef uint32_t XXH32_hash_t;
#else
@@ -551,10 +627,6 @@ typedef uint32_t XXH32_hash_t;
/*!
* @brief Calculates the 32-bit hash of @p input using xxHash32.
*
- * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
- *
- * See @ref single_shot_example "Single Shot Example" for an example.
- *
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
* @param seed The 32-bit seed to alter the hash's output predictably.
@@ -564,63 +636,44 @@ typedef uint32_t XXH32_hash_t;
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return The calculated 32-bit hash value.
+ * @return The calculated 32-bit xxHash32 value.
*
- * @see
- * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- * Direct equivalents for the other variants of xxHash.
- * @see
- * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
#ifndef XXH_NO_STREAM
-/*!
- * Streaming functions generate the xxHash value from an incremental input.
- * This method is slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
- *
- * An XXH state must first be allocated using `XXH*_createState()`.
- *
- * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
- *
- * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
- *
- * The function returns an error code, with 0 meaning OK, and any other value
- * meaning there is an error.
- *
- * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
- * This function returns the nn-bits hash as an int or long long.
- *
- * It's still possible to continue inserting input into the hash state after a
- * digest, and generate new hash values later on by invoking `XXH*_digest()`.
- *
- * When done, release the state using `XXH*_freeState()`.
- *
- * @see streaming_example at the top of @ref xxhash.h for an example.
- */
-
/*!
* @typedef struct XXH32_state_s XXH32_state_t
* @brief The opaque state struct for the XXH32 streaming API.
*
* @see XXH32_state_s for details.
+ * @see @ref streaming_example "Streaming Example"
*/
typedef struct XXH32_state_s XXH32_state_t;
/*!
* @brief Allocates an @ref XXH32_state_t.
*
- * Must be freed with XXH32_freeState().
- * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ * @return An allocated pointer of @ref XXH32_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH32_freeState().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
/*!
* @brief Frees an @ref XXH32_state_t.
*
- * Must be allocated with XXH32_createState().
* @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
- * @return XXH_OK.
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH32_createState().
+ *
+ * @see @ref streaming_example "Streaming Example"
+ *
*/
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
/*!
@@ -636,23 +689,24 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_
/*!
* @brief Resets an @ref XXH32_state_t to begin a new hash.
*
- * This function resets and seeds a state. Call it before @ref XXH32_update().
- *
* @param statePtr The state struct to reset.
* @param seed The 32-bit seed to alter the hash result predictably.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
/*!
* @brief Consumes a block of @p input to an @ref XXH32_state_t.
*
- * Call this to incrementally consume blocks of data.
- *
* @param statePtr The state struct to update.
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
@@ -664,48 +718,36 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
/*!
* @brief Returns the calculated hash value from an @ref XXH32_state_t.
*
- * @note
- * Calling XXH32_digest() will not affect @p statePtr, so you can update,
- * digest, and update again.
- *
* @param statePtr The state struct to calculate the hash from.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return The calculated xxHash32 value from that state.
+ * @return The calculated 32-bit xxHash32 value from that state.
+ *
+ * @note
+ * Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
#endif /* !XXH_NO_STREAM */
/******* Canonical representation *******/
-/*
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- * This the simplest and fastest format for further post-processing.
- *
- * However, this leaves open the question of what is the order on the byte level,
- * since little and big endian conventions will store the same number differently.
- *
- * The canonical representation settles this issue by mandating big-endian
- * convention, the same convention as human-readable numbers (large digits first).
- *
- * When writing hash values to storage, sending them over a network, or printing
- * them, it's highly recommended to use the canonical representation to ensure
- * portability across a wider range of systems, present and future.
- *
- * The following functions allow transformation of hash values to and from
- * canonical format.
- */
-
/*!
* @brief Canonical (big endian) representation of @ref XXH32_hash_t.
*/
@@ -716,11 +758,13 @@ typedef struct {
/*!
* @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
*
- * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
* @param hash The @ref XXH32_hash_t to be converted.
*
* @pre
* @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
@@ -733,6 +777,8 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
* @p src must not be `NULL`.
*
* @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
@@ -794,7 +840,7 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni
* As of writing this, only supported by clang.
*/
#if XXH_HAS_ATTRIBUTE(noescape)
-# define XXH_NOESCAPE __attribute__((noescape))
+# define XXH_NOESCAPE __attribute__((__noescape__))
#else
# define XXH_NOESCAPE
#endif
@@ -821,7 +867,11 @@ typedef uint64_t XXH64_hash_t;
#elif !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# ifdef _AIX
+# include <inttypes.h>
+# else
+# include <stdint.h>
+# endif
typedef uint64_t XXH64_hash_t;
#else
# include <limits.h>
@@ -851,9 +901,6 @@ typedef uint64_t XXH64_hash_t;
/*!
* @brief Calculates the 64-bit hash of @p input using xxHash64.
*
- * This function usually runs faster on 64-bit systems, but slower on 32-bit
- * systems (see benchmark).
- *
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
* @param seed The 64-bit seed to alter the hash's output predictably.
@@ -863,13 +910,9 @@ typedef uint64_t XXH64_hash_t;
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return The calculated 64-bit hash.
+ * @return The calculated 64-bit xxHash64 value.
*
- * @see
- * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- * Direct equivalents for the other variants of xxHash.
- * @see
- * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
@@ -879,23 +922,32 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size
* @brief The opaque state struct for the XXH64 streaming API.
*
* @see XXH64_state_s for details.
+ * @see @ref streaming_example "Streaming Example"
*/
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
/*!
* @brief Allocates an @ref XXH64_state_t.
*
- * Must be freed with XXH64_freeState().
- * @return An allocated XXH64_state_t on success, `NULL` on failure.
+ * @return An allocated pointer of @ref XXH64_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH64_freeState().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
/*!
* @brief Frees an @ref XXH64_state_t.
*
- * Must be allocated with XXH64_createState().
* @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
- * @return XXH_OK.
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH64_createState().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
@@ -912,23 +964,24 @@ XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const
/*!
* @brief Resets an @ref XXH64_state_t to begin a new hash.
*
- * This function resets and seeds a state. Call it before @ref XXH64_update().
- *
* @param statePtr The state struct to reset.
* @param seed The 64-bit seed to alter the hash result predictably.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH64_update().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
/*!
* @brief Consumes a block of @p input to an @ref XXH64_state_t.
*
- * Call this to incrementally consume blocks of data.
- *
* @param statePtr The state struct to update.
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
@@ -940,23 +993,30 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr,
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
/*!
* @brief Returns the calculated hash value from an @ref XXH64_state_t.
*
- * @note
- * Calling XXH64_digest() will not affect @p statePtr, so you can update,
- * digest, and update again.
- *
* @param statePtr The state struct to calculate the hash from.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return The calculated xxHash64 value from that state.
+ * @return The calculated 64-bit xxHash64 value from that state.
+ *
+ * @note
+ * Calling XXH64_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
#endif /* !XXH_NO_STREAM */
@@ -975,6 +1035,8 @@ typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t
*
* @pre
* @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
@@ -987,6 +1049,8 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst,
* @p src must not be `NULL`.
*
* @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
@@ -1046,40 +1110,74 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const
*
* The API supports one-shot hashing, streaming mode, and custom secrets.
*/
+
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Unless set explicitly, determined automatically.
+ */
+# define XXH_SCALAR 0 /*!< Portable scalar version */
+# define XXH_SSE2 1 /*!< SSE2 for Pentium 4, Opteron, all x86_64. */
+# define XXH_AVX2 2 /*!< AVX2 for Haswell and Bulldozer */
+# define XXH_AVX512 3 /*!< AVX512 for Skylake and Icelake */
+# define XXH_NEON 4 /*!< NEON for most ARMv7-A, all AArch64, and WASM SIMD128 */
+# define XXH_VSX 5 /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+# define XXH_SVE 6 /*!< SVE for some ARMv8-A and ARMv9-A */
+# define XXH_LSX 7 /*!< LSX (128-bit SIMD) for LoongArch64 */
+
+
/*-**********************************************************************
* XXH3 64-bit variant
************************************************************************/
/*!
- * @brief 64-bit unseeded variant of XXH3.
+ * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
*
- * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
- * it may have slightly better performance due to constant propagation of the
- * defaults.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
*
- * @see
- * XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
* @see
* XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
- * @see
- * XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
/*!
- * @brief 64-bit seeded variant of XXH3
+ * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
*
- * This variant generates a custom secret on the fly based on default secret
- * altered using the `seed` value.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
*
- * While this operation is decently fast, note that it's not completely free.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
*
* @note
* seed == 0 produces the same results as @ref XXH3_64bits().
*
- * @param input The data to hash
- * @param length The length
- * @param seed The 64-bit seed to alter the state.
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
@@ -1093,22 +1191,36 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const vo
#define XXH3_SECRET_SIZE_MIN 136
/*!
- * @brief 64-bit variant of XXH3 with a custom "secret".
+ * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @pre
+ * The memory between @p data and @p data + @p len must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p data may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
*
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
* This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
* However, the quality of the secret impacts the dispersion of the hash algorithm.
* Therefore, the secret _must_ look like a bunch of random bytes.
* Avoid "trivial" or structured data such as repeated sequences or a text document.
* Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing "XXH3_generateSecret()" instead (see below).
+ * consider employing @ref XXH3_generateSecret() instead (see below).
* It will generate a proper high entropy secret derived from the blob of bytes.
* Another advantage of using XXH3_generateSecret() is that
* it guarantees that all bits within the initial blob of bytes
* will impact every bit of the output.
* This is not necessarily the case when using the blob of bytes directly
* because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
@@ -1123,9 +1235,10 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const
*/
/*!
- * @brief The state struct for the XXH3 streaming API.
+ * @brief The opaque state struct for the XXH3 streaming API.
*
* @see XXH3_state_s for details.
+ * @see @ref streaming_example "Streaming Example"
*/
typedef struct XXH3_state_s XXH3_state_t;
XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
@@ -1144,15 +1257,20 @@ XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOE
/*!
* @brief Resets an @ref XXH3_state_t to begin a new hash.
*
- * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_64bits_update().
- * Digest will be equivalent to `XXH3_64bits()`.
- *
* @param statePtr The state struct to reset.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret with default parameters.
+ * - Call this function before @ref XXH3_64bits_update().
+ * - Digest will be equivalent to `XXH3_64bits()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
*
*/
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
@@ -1160,36 +1278,54 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* stateP
/*!
* @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
*
- * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_64bits_update().
- * Digest will be equivalent to `XXH3_64bits_withSeed()`.
- *
* @param statePtr The state struct to reset.
- * @param seed The 64-bit seed to alter the state.
+ * @param seed The 64-bit seed to alter the hash result predictably.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret from `seed`.
+ * - Call this function before @ref XXH3_64bits_update().
+ * - Digest will be equivalent to `XXH3_64bits_withSeed()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
*
*/
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
/*!
- * XXH3_64bits_reset_withSecret():
- * `secret` is referenced, it _must outlive_ the hash streaming session.
- * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ *
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
* and the quality of produced hash values depends on secret's entropy
* (secret's content should look like a bunch of random bytes).
* When in doubt about the randomness of a candidate `secret`,
* consider employing `XXH3_generateSecret()` instead (see below).
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
/*!
* @brief Consumes a block of @p input to an @ref XXH3_state_t.
*
- * Call this to incrementally consume blocks of data.
- *
* @param statePtr The state struct to update.
* @param input The block of data to be hashed, at least @p length bytes in size.
* @param length The length of @p input, in bytes.
@@ -1201,23 +1337,30 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_stat
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
/*!
* @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
*
- * @note
- * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
- * digest, and update again.
- *
* @param statePtr The state struct to calculate the hash from.
*
* @pre
* @p statePtr must not be `NULL`.
*
* @return The calculated XXH3 64-bit hash value from that state.
+ *
+ * @note
+ * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
#endif /* !XXH_NO_STREAM */
@@ -1242,26 +1385,71 @@ typedef struct {
} XXH128_hash_t;
/*!
- * @brief Unseeded 128-bit variant of XXH3
+ * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len The length of @p data, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
*
* The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
* for shorter inputs.
*
- * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
* it may have slightly better performance due to constant propagation of the
* defaults.
*
- * @see
- * XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
- * @see
- * XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
- * @see
- * XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
+ * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
-/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
+/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * @note
+ * seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
-/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
+/*!
+ * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing @ref XXH3_generateSecret() instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
/******* Streaming *******/
@@ -1281,36 +1469,65 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE cons
/*!
* @brief Resets an @ref XXH3_state_t to begin a new hash.
*
- * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_128bits_update().
- * Digest will be equivalent to `XXH3_128bits()`.
- *
* @param statePtr The state struct to reset.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret with default parameters.
+ * - Call it before @ref XXH3_128bits_update().
+ * - Digest will be equivalent to `XXH3_128bits()`.
*
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
/*!
* @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
*
- * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_128bits_update().
- * Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ * - This function resets `statePtr` and generate a secret from `seed`.
+ * - Call it before @ref XXH3_128bits_update().
+ * - Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ *
+ * @see @ref streaming_example "Streaming Example"
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
*
- * @param statePtr The state struct to reset.
- * @param seed The 64-bit seed to alter the state.
+ * @param statePtr The state struct to reset.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
*
* @pre
* @p statePtr must not be `NULL`.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
*
+ * @see @ref streaming_example "Streaming Example"
*/
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*! @brief Custom secret 128-bit variant of XXH3. @see XXH_64bits_reset_withSecret(). */
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
/*!
@@ -1324,28 +1541,32 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_sta
*
* @pre
* @p statePtr must not be `NULL`.
- * @pre
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
* The memory between @p input and @p input + @p length must be valid,
* readable, contiguous memory. However, if @p length is `0`, @p input may be
* `NULL`. In C++, this also must be *TriviallyCopyable*.
*
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
*/
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
/*!
* @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
*
- * @note
- * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
- * digest, and update again.
- *
* @param statePtr The state struct to calculate the hash from.
*
* @pre
* @p statePtr must not be `NULL`.
*
* @return The calculated XXH3 128-bit hash value from that state.
+ *
+ * @note
+ * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
*/
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
#endif /* !XXH_NO_STREAM */
@@ -1355,18 +1576,27 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const X
* Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
/*!
- * XXH128_isEqual():
- * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ * @brief Check equality of two XXH128_hash_t values
+ *
+ * @param h1 The 128-bit hash value.
+ * @param h2 Another 128-bit hash value.
+ *
+ * @return `1` if `h1` and `h2` are equal.
+ * @return `0` if they are not.
*/
XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
/*!
* @brief Compares two @ref XXH128_hash_t
+ *
* This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
*
- * @return: >0 if *h128_1 > *h128_2
- * =0 if *h128_1 == *h128_2
- * <0 if *h128_1 < *h128_2
+ * @param h128_1 Left-hand side value
+ * @param h128_2 Right-hand side value
+ *
+ * @return >0 if @p h128_1 > @p h128_2
+ * @return =0 if @p h128_1 == @p h128_2
+ * @return <0 if @p h128_1 < @p h128_2
*/
XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
@@ -1378,11 +1608,12 @@ typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical
/*!
* @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
*
- * @param dst The @ref XXH128_canonical_t pointer to be stored to.
+ * @param dst The @ref XXH128_canonical_t pointer to be stored to.
* @param hash The @ref XXH128_hash_t to be converted.
*
* @pre
* @p dst must not be `NULL`.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
@@ -1395,6 +1626,7 @@ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* ds
* @p src must not be `NULL`.
*
* @return The converted hash.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
*/
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
@@ -1440,9 +1672,9 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE con
struct XXH32_state_s {
XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
- XXH32_hash_t v[4]; /*!< Accumulator lanes */
- XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
- XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
+ XXH32_hash_t acc[4]; /*!< Accumulator lanes */
+ unsigned char buffer[16]; /*!< Internal buffer for partial reads. */
+ XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */
XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
}; /* typedef'd to XXH32_state_t */
@@ -1463,9 +1695,9 @@ struct XXH32_state_s {
*/
struct XXH64_state_s {
XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
- XXH64_hash_t v[4]; /*!< Accumulator lanes */
- XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
- XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
+ XXH64_hash_t acc[4]; /*!< Accumulator lanes */
+ unsigned char buffer[32]; /*!< Internal buffer for partial reads.. */
+ XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */
XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
}; /* typedef'd to XXH64_state_t */
@@ -1473,8 +1705,7 @@ struct XXH64_state_s {
#ifndef XXH_NO_XXH3
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
-# include <stdalign.h>
-# define XXH_ALIGN(n) alignas(n)
+# define XXH_ALIGN(n) _Alignas(n)
#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
/* In C++ alignas() is a keyword */
# define XXH_ALIGN(n) alignas(n)
@@ -1587,7 +1818,20 @@ struct XXH3_state_s {
/*!
- * simple alias to pre-selected XXH3_128bits variant
+ * @brief Calculates the 128-bit hash of @p data using XXH3.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p data and @p data + @p len must be valid,
+ * readable, contiguous memory. However, if @p len is `0`, @p data may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 128-bit XXH3 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
*/
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
@@ -1596,9 +1840,16 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, siz
/* Symbols defined below must be considered tied to a specific library version. */
/*!
- * XXH3_generateSecret():
+ * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
+ *
+ * @param secretBuffer A writable buffer for derived high-entropy secret data.
+ * @param secretSize Size of secretBuffer, in bytes. Must be >= XXH3_SECRET_SIZE_MIN.
+ * @param customSeed A user-defined content.
+ * @param customSeedSize Size of customSeed, in bytes.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
*
- * Derive a high-entropy secret from any user-defined content, named customSeed.
* The generated secret can be used in combination with `*_withSecret()` functions.
* The `_withSecret()` variants are useful to provide a higher level of protection
* than 64-bit seed, as it becomes much more difficult for an external actor to
@@ -1651,6 +1902,9 @@ XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer
/*!
* @brief Generate the same secret as the _withSeed() variants.
*
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_DEFAULT_SIZE bytes
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
* The generated secret can be used in combination with
*`*_withSecret()` and `_withSecretandSeed()` variants.
*
@@ -1670,7 +1924,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer
* };
* // Fast, caches the seeded secret for future uses.
* class HashFast {
- * unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ * unsigned char secret[XXH3_SECRET_DEFAULT_SIZE];
* public:
* HashFast(XXH64_hash_t s) {
* XXH3_generateSecret_fromSeed(secret, seed);
@@ -1682,15 +1936,26 @@ XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer
* }
* };
* @endcode
- * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
- * @param seed The seed to seed the state.
*/
XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
/*!
- * These variants generate hash values using either
- * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
- * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ * @brief Maximum size of "short" key in bytes.
+ */
+#define XXH3_MIDSIZE_MAX 240
+
+/*!
+ * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len The length of @p data, in bytes.
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * These variants generate hash values using either:
+ * - @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
+ * - @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
*
* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
* `_withSeed()` has to generate the secret on the fly for "large" keys.
@@ -1717,22 +1982,71 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed);
-/*! @copydoc XXH3_64bits_withSecretandSeed() */
+
+/*!
+ * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The memory segment to be hashed, at least @p len bytes in size.
+ * @param length The length of @p data, in bytes.
+ * @param secret The secret used to alter hash result predictably.
+ * @param secretSize The length of @p secret, in bytes (must be >= XXH3_SECRET_SIZE_MIN)
+ * @param seed64 The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed(): contract is the same.
+ */
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed64);
+
#ifndef XXH_NO_STREAM
-/*! @copydoc XXH3_64bits_withSecretandSeed() */
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64 The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed(). Contract is identical.
+ */
XXH_PUBLIC_API XXH_errorcode
XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed64);
-/*! @copydoc XXH3_64bits_withSecretandSeed() */
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64 The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed(). Contract is identical.
+ *
+ * Note: there was a bug in an earlier version of this function (<= v0.8.2)
+ * that would make it generate an incorrect hash value
+ * when @p seed == 0 and @p length < XXH3_MIDSIZE_MAX
+ * and @p secret is different from XXH3_generateSecret_fromSeed().
+ * As stated in the contract, the correct hash result must be
+ * the same as XXH3_128bits_withSeed() when @p length <= XXH3_MIDSIZE_MAX.
+ * Results generated by this older version are wrong, hence not comparable.
+ */
XXH_PUBLIC_API XXH_errorcode
XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
XXH_NOESCAPE const void* secret, size_t secretSize,
XXH64_hash_t seed64);
+
#endif /* !XXH_NO_STREAM */
#endif /* !XXH_NO_XXH3 */
@@ -2100,15 +2414,15 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
# if defined(__GNUC__) || defined(__clang__)
-# define XXH_FORCE_INLINE static __attribute__((unused))
+# define XXH_FORCE_INLINE static __attribute__((__unused__))
# else
# define XXH_FORCE_INLINE static
# endif
# define XXH_NO_INLINE static
/* enable inlining hints */
#elif defined(__GNUC__) || defined(__clang__)
-# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
-# define XXH_NO_INLINE static __attribute__((noinline))
+# define XXH_FORCE_INLINE static __inline__ __attribute__((__always_inline__, __unused__))
+# define XXH_NO_INLINE static __attribute__((__noinline__))
#elif defined(_MSC_VER) /* Visual Studio */
# define XXH_FORCE_INLINE static __forceinline
# define XXH_NO_INLINE static __declspec(noinline)
@@ -2121,12 +2435,34 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
# define XXH_NO_INLINE static
#endif
+#if defined(XXH_INLINE_ALL)
+# define XXH_STATIC XXH_FORCE_INLINE
+#else
+# define XXH_STATIC static
+#endif
+
#if XXH3_INLINE_SECRET
# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
#else
# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
#endif
+#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
+# define XXH_RESTRICT /* disable */
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
+# define XXH_RESTRICT restrict
+#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+ || (defined (__clang__)) \
+ || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
+ || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+/*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+# define XXH_RESTRICT __restrict
+#else
+# define XXH_RESTRICT /* disable */
+#endif
/* *************************************
* Debug
@@ -2206,10 +2542,14 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
#if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
- typedef uint8_t xxh_u8;
+# ifdef _AIX
+# include <inttypes.h>
+# else
+# include <stdint.h>
+# endif
+ typedef uint8_t xxh_u8;
#else
- typedef unsigned char xxh_u8;
+ typedef unsigned char xxh_u8;
#endif
typedef XXH32_hash_t xxh_u32;
@@ -2295,11 +2635,11 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
* https://gcc.godbolt.org/z/xYez1j67Y.
*/
#ifdef XXH_OLD_NAMES
-typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+typedef union { xxh_u32 u32; } __attribute__((__packed__)) unalign;
#endif
static xxh_u32 XXH_read32(const void* ptr)
{
- typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+ typedef __attribute__((__aligned__(1))) xxh_u32 xxh_unalign32;
return *((const xxh_unalign32*)ptr);
}
@@ -2445,6 +2785,9 @@ static int XXH_isLittleEndian(void)
&& XXH_HAS_BUILTIN(__builtin_rotateleft64)
# define XXH_rotl32 __builtin_rotateleft32
# define XXH_rotl64 __builtin_rotateleft64
+#elif XXH_HAS_BUILTIN(__builtin_stdc_rotate_left)
+# define XXH_rotl32 __builtin_stdc_rotate_left
+# define XXH_rotl64 __builtin_stdc_rotate_left
/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
#elif defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
@@ -2590,7 +2933,7 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
/*
* UGLY HACK:
- * A compiler fence is the only thing that prevents GCC and Clang from
+ * A compiler fence is used to prevent GCC and Clang from
* autovectorizing the XXH32 loop (pragmas and attributes don't work for some
* reason) without globally disabling SSE4.1.
*
@@ -2651,6 +2994,61 @@ static xxh_u32 XXH32_avalanche(xxh_u32 hash)
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+/*!
+ * @internal
+ * @brief Sets up the initial accumulator state for XXH32().
+ */
+XXH_FORCE_INLINE void
+XXH32_initAccs(xxh_u32 *acc, xxh_u32 seed)
+{
+ XXH_ASSERT(acc != NULL);
+ acc[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+ acc[1] = seed + XXH_PRIME32_2;
+ acc[2] = seed + 0;
+ acc[3] = seed - XXH_PRIME32_1;
+}
+
+/*!
+ * @internal
+ * @brief Consumes a block of data for XXH32().
+ *
+ * @return the end input pointer.
+ */
+XXH_FORCE_INLINE const xxh_u8 *
+XXH32_consumeLong(
+ xxh_u32 *XXH_RESTRICT acc,
+ xxh_u8 const *XXH_RESTRICT input,
+ size_t len,
+ XXH_alignment align
+)
+{
+ const xxh_u8* const bEnd = input + len;
+ const xxh_u8* const limit = bEnd - 15;
+ XXH_ASSERT(acc != NULL);
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(len >= 16);
+ do {
+ acc[0] = XXH32_round(acc[0], XXH_get32bits(input)); input += 4;
+ acc[1] = XXH32_round(acc[1], XXH_get32bits(input)); input += 4;
+ acc[2] = XXH32_round(acc[2], XXH_get32bits(input)); input += 4;
+ acc[3] = XXH32_round(acc[3], XXH_get32bits(input)); input += 4;
+ } while (input < limit);
+
+ return input;
+}
+
+/*!
+ * @internal
+ * @brief Merges the accumulator lanes together for XXH32()
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
+XXH32_mergeAccs(const xxh_u32 *acc)
+{
+ XXH_ASSERT(acc != NULL);
+ return XXH_rotl32(acc[0], 1) + XXH_rotl32(acc[1], 7)
+ + XXH_rotl32(acc[2], 12) + XXH_rotl32(acc[3], 18);
+}
+
/*!
* @internal
* @brief Processes the last 0-15 bytes of @p ptr.
@@ -2763,22 +3161,12 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
if (input==NULL) XXH_ASSERT(len == 0);
if (len>=16) {
- const xxh_u8* const bEnd = input + len;
- const xxh_u8* const limit = bEnd - 15;
- xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
- xxh_u32 v2 = seed + XXH_PRIME32_2;
- xxh_u32 v3 = seed + 0;
- xxh_u32 v4 = seed - XXH_PRIME32_1;
+ xxh_u32 acc[4];
+ XXH32_initAccs(acc, seed);
- do {
- v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
- v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
- v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
- v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
- } while (input < limit);
-
- h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
- + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ input = XXH32_consumeLong(acc, input, len, align);
+
+ h32 = XXH32_mergeAccs(acc);
} else {
h32 = seed + XXH_PRIME32_5;
}
@@ -2834,10 +3222,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t s
{
XXH_ASSERT(statePtr != NULL);
memset(statePtr, 0, sizeof(*statePtr));
- statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
- statePtr->v[1] = seed + XXH_PRIME32_2;
- statePtr->v[2] = seed + 0;
- statePtr->v[3] = seed - XXH_PRIME32_1;
+ XXH32_initAccs(statePtr->acc, seed);
return XXH_OK;
}
@@ -2851,45 +3236,37 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
return XXH_OK;
}
- { const xxh_u8* p = (const xxh_u8*)input;
- const xxh_u8* const bEnd = p + len;
+ state->total_len_32 += (XXH32_hash_t)len;
+ state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
- state->total_len_32 += (XXH32_hash_t)len;
- state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+ XXH_ASSERT(state->bufferedSize < sizeof(state->buffer));
+ if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */
+ XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+ state->bufferedSize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
- if (state->memsize + len < 16) { /* fill in tmp buffer */
- XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
- state->memsize += (XXH32_hash_t)len;
- return XXH_OK;
- }
+ { const xxh_u8* xinput = (const xxh_u8*)input;
+ const xxh_u8* const bEnd = xinput + len;
- if (state->memsize) { /* some data left from previous update */
- XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
- { const xxh_u32* p32 = state->mem32;
- state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
- state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
- state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
- state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
- }
- p += 16-state->memsize;
- state->memsize = 0;
+ if (state->bufferedSize) { /* non-empty buffer: complete first */
+ XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize);
+ xinput += sizeof(state->buffer) - state->bufferedSize;
+ /* then process one round */
+ (void)XXH32_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned);
+ state->bufferedSize = 0;
}
- if (p <= bEnd-16) {
- const xxh_u8* const limit = bEnd - 16;
-
- do {
- state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
- state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
- state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
- state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
- } while (p<=limit);
-
+ XXH_ASSERT(xinput <= bEnd);
+ if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) {
+ /* Process the remaining data */
+ xinput = XXH32_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned);
}
- if (p < bEnd) {
- XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
- state->memsize = (unsigned)(bEnd-p);
+ if (xinput < bEnd) {
+ /* Copy the leftover to the tmp buffer */
+ XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput));
+ state->bufferedSize = (unsigned)(bEnd-xinput);
}
}
@@ -2903,36 +3280,20 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
xxh_u32 h32;
if (state->large_len) {
- h32 = XXH_rotl32(state->v[0], 1)
- + XXH_rotl32(state->v[1], 7)
- + XXH_rotl32(state->v[2], 12)
- + XXH_rotl32(state->v[3], 18);
+ h32 = XXH32_mergeAccs(state->acc);
} else {
- h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
+ h32 = state->acc[2] /* == seed */ + XXH_PRIME32_5;
}
h32 += state->total_len_32;
- return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+ return XXH32_finalize(h32, state->buffer, state->bufferedSize, XXH_aligned);
}
#endif /* !XXH_NO_STREAM */
/******* Canonical representation *******/
-/*!
- * @ingroup XXH32_family
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- *
- * The canonical representation uses big endian convention, the same convention
- * as human-readable numbers (large digits first).
- *
- * This way, hash values can be written into a file or buffer, remaining
- * comparable across different systems.
- *
- * The following functions allow transformation of hash values to and from their
- * canonical format.
- */
+/*! @ingroup XXH32_family */
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
@@ -2987,11 +3348,11 @@ static xxh_u64 XXH_read64(const void* memPtr)
* https://gcc.godbolt.org/z/xYez1j67Y.
*/
#ifdef XXH_OLD_NAMES
-typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((__packed__)) unalign64;
#endif
static xxh_u64 XXH_read64(const void* ptr)
{
- typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+ typedef __attribute__((__aligned__(1))) xxh_u64 xxh_unalign64;
return *((const xxh_unalign64*)ptr);
}
@@ -3110,6 +3471,23 @@ static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
acc += input * XXH_PRIME64_2;
acc = XXH_rotl64(acc, 31);
acc *= XXH_PRIME64_1;
+#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+ /*
+ * DISABLE AUTOVECTORIZATION:
+ * A compiler fence is used to prevent GCC and Clang from
+ * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
+ * reason) without globally disabling AVX512.
+ *
+ * Autovectorization of XXH64 tends to be detrimental,
+ * though the exact outcome may change depending on exact cpu and compiler version.
+ * For information, it has been reported as detrimental for Skylake-X,
+ * but possibly beneficial for Zen4.
+ *
+ * The default is to disable auto-vectorization,
+ * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
+ */
+ XXH_COMPILER_GUARD(acc);
+#endif
return acc;
}
@@ -3135,6 +3513,85 @@ static xxh_u64 XXH64_avalanche(xxh_u64 hash)
#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+/*!
+ * @internal
+ * @brief Sets up the initial accumulator state for XXH64().
+ */
+XXH_FORCE_INLINE void
+XXH64_initAccs(xxh_u64 *acc, xxh_u64 seed)
+{
+ XXH_ASSERT(acc != NULL);
+ acc[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+ acc[1] = seed + XXH_PRIME64_2;
+ acc[2] = seed + 0;
+ acc[3] = seed - XXH_PRIME64_1;
+}
+
+/*!
+ * @internal
+ * @brief Consumes a block of data for XXH64().
+ *
+ * @return the end input pointer.
+ */
+XXH_FORCE_INLINE const xxh_u8 *
+XXH64_consumeLong(
+ xxh_u64 *XXH_RESTRICT acc,
+ xxh_u8 const *XXH_RESTRICT input,
+ size_t len,
+ XXH_alignment align
+)
+{
+ const xxh_u8* const bEnd = input + len;
+ const xxh_u8* const limit = bEnd - 31;
+ XXH_ASSERT(acc != NULL);
+ XXH_ASSERT(input != NULL);
+ XXH_ASSERT(len >= 32);
+ do {
+ /* reroll on 32-bit */
+ if (sizeof(void *) < sizeof(xxh_u64)) {
+ size_t i;
+ for (i = 0; i < 4; i++) {
+ acc[i] = XXH64_round(acc[i], XXH_get64bits(input));
+ input += 8;
+ }
+ } else {
+ acc[0] = XXH64_round(acc[0], XXH_get64bits(input)); input += 8;
+ acc[1] = XXH64_round(acc[1], XXH_get64bits(input)); input += 8;
+ acc[2] = XXH64_round(acc[2], XXH_get64bits(input)); input += 8;
+ acc[3] = XXH64_round(acc[3], XXH_get64bits(input)); input += 8;
+ }
+ } while (input < limit);
+
+ return input;
+}
+
+/*!
+ * @internal
+ * @brief Merges the accumulator lanes together for XXH64()
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
+XXH64_mergeAccs(const xxh_u64 *acc)
+{
+ XXH_ASSERT(acc != NULL);
+ {
+ xxh_u64 h64 = XXH_rotl64(acc[0], 1) + XXH_rotl64(acc[1], 7)
+ + XXH_rotl64(acc[2], 12) + XXH_rotl64(acc[3], 18);
+ /* reroll on 32-bit */
+ if (sizeof(void *) < sizeof(xxh_u64)) {
+ size_t i;
+ for (i = 0; i < 4; i++) {
+ h64 = XXH64_mergeRound(h64, acc[i]);
+ }
+ } else {
+ h64 = XXH64_mergeRound(h64, acc[0]);
+ h64 = XXH64_mergeRound(h64, acc[1]);
+ h64 = XXH64_mergeRound(h64, acc[2]);
+ h64 = XXH64_mergeRound(h64, acc[3]);
+ }
+ return h64;
+ }
+}
+
/*!
* @internal
* @brief Processes the last 0-31 bytes of @p ptr.
@@ -3150,7 +3607,7 @@ static xxh_u64 XXH64_avalanche(xxh_u64 hash)
* @return The finalized hash
* @see XXH32_finalize().
*/
-static XXH_PUREF xxh_u64
+XXH_STATIC XXH_PUREF xxh_u64
XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
{
if (ptr==NULL) XXH_ASSERT(len == 0);
@@ -3200,27 +3657,13 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
xxh_u64 h64;
if (input==NULL) XXH_ASSERT(len == 0);
- if (len>=32) {
- const xxh_u8* const bEnd = input + len;
- const xxh_u8* const limit = bEnd - 31;
- xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
- xxh_u64 v2 = seed + XXH_PRIME64_2;
- xxh_u64 v3 = seed + 0;
- xxh_u64 v4 = seed - XXH_PRIME64_1;
+ if (len>=32) { /* Process a large block of data */
+ xxh_u64 acc[4];
+ XXH64_initAccs(acc, seed);
- do {
- v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
- v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
- v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
- v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
- } while (input<limit);
-
- h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
- h64 = XXH64_mergeRound(h64, v1);
- h64 = XXH64_mergeRound(h64, v2);
- h64 = XXH64_mergeRound(h64, v3);
- h64 = XXH64_mergeRound(h64, v4);
+ input = XXH64_consumeLong(acc, input, len, align);
+ h64 = XXH64_mergeAccs(acc);
} else {
h64 = seed + XXH_PRIME64_5;
}
@@ -3276,10 +3719,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, X
{
XXH_ASSERT(statePtr != NULL);
memset(statePtr, 0, sizeof(*statePtr));
- statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
- statePtr->v[1] = seed + XXH_PRIME64_2;
- statePtr->v[2] = seed + 0;
- statePtr->v[3] = seed - XXH_PRIME64_1;
+ XXH64_initAccs(statePtr->acc, seed);
return XXH_OK;
}
@@ -3292,42 +3732,36 @@ XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input,
return XXH_OK;
}
- { const xxh_u8* p = (const xxh_u8*)input;
- const xxh_u8* const bEnd = p + len;
+ state->total_len += len;
- state->total_len += len;
+ XXH_ASSERT(state->bufferedSize <= sizeof(state->buffer));
+ if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */
+ XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+ state->bufferedSize += (XXH32_hash_t)len;
+ return XXH_OK;
+ }
- if (state->memsize + len < 32) { /* fill in tmp buffer */
- XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
- state->memsize += (xxh_u32)len;
- return XXH_OK;
- }
+ { const xxh_u8* xinput = (const xxh_u8*)input;
+ const xxh_u8* const bEnd = xinput + len;
- if (state->memsize) { /* tmp buffer is full */
- XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
- state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
- state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
- state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
- state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
- p += 32 - state->memsize;
- state->memsize = 0;
+ if (state->bufferedSize) { /* non-empty buffer => complete first */
+ XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize);
+ xinput += sizeof(state->buffer) - state->bufferedSize;
+ /* and process one round */
+ (void)XXH64_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned);
+ state->bufferedSize = 0;
}
- if (p+32 <= bEnd) {
- const xxh_u8* const limit = bEnd - 32;
-
- do {
- state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
- state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
- state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
- state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
- } while (p<=limit);
-
+ XXH_ASSERT(xinput <= bEnd);
+ if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) {
+ /* Process the remaining data */
+ xinput = XXH64_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned);
}
- if (p < bEnd) {
- XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
- state->memsize = (unsigned)(bEnd-p);
+ if (xinput < bEnd) {
+ /* Copy the leftover to the tmp buffer */
+ XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput));
+ state->bufferedSize = (unsigned)(bEnd-xinput);
}
}
@@ -3341,18 +3775,14 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state
xxh_u64 h64;
if (state->total_len >= 32) {
- h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
- h64 = XXH64_mergeRound(h64, state->v[0]);
- h64 = XXH64_mergeRound(h64, state->v[1]);
- h64 = XXH64_mergeRound(h64, state->v[2]);
- h64 = XXH64_mergeRound(h64, state->v[3]);
+ h64 = XXH64_mergeAccs(state->acc);
} else {
- h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
+ h64 = state->acc[2] /*seed*/ + XXH_PRIME64_5;
}
h64 += (xxh_u64) state->total_len;
- return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+ return XXH64_finalize(h64, state->buffer, (size_t)state->total_len, XXH_aligned);
}
#endif /* !XXH_NO_STREAM */
@@ -3387,22 +3817,6 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
/* === Compiler specifics === */
-#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-# define XXH_RESTRICT /* disable */
-#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
-# define XXH_RESTRICT restrict
-#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
- || (defined (__clang__)) \
- || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
- || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
-/*
- * There are a LOT more compilers that recognize __restrict but this
- * covers the major ones.
- */
-# define XXH_RESTRICT __restrict
-#else
-# define XXH_RESTRICT /* disable */
-#endif
#if (defined(__GNUC__) && (__GNUC__ >= 3)) \
|| (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
@@ -3416,7 +3830,11 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
#ifndef XXH_HAS_INCLUDE
# ifdef __has_include
-# define XXH_HAS_INCLUDE(x) __has_include(x)
+/*
+ * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
+ * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
+ */
+# define XXH_HAS_INCLUDE __has_include
# else
# define XXH_HAS_INCLUDE(x) 0
# endif
@@ -3437,6 +3855,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
# include <immintrin.h>
# elif defined(__SSE2__)
# include <emmintrin.h>
+# elif defined(__loongarch_sx)
+# include <lsxintrin.h>
# endif
#endif
@@ -3533,33 +3953,6 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
* implementation.
*/
# define XXH_VECTOR XXH_SCALAR
-/*!
- * @ingroup tuning
- * @brief Possible values for @ref XXH_VECTOR.
- *
- * Note that these are actually implemented as macros.
- *
- * If this is not defined, it is detected automatically.
- * internal macro XXH_X86DISPATCH overrides this.
- */
-enum XXH_VECTOR_TYPE /* fake enum */ {
- XXH_SCALAR = 0, /*!< Portable scalar version */
- XXH_SSE2 = 1, /*!<
- * SSE2 for Pentium 4, Opteron, all x86_64.
- *
- * @note SSE2 is also guaranteed on Windows 10, macOS, and
- * Android x86.
- */
- XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */
- XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
- XXH_NEON = 4, /*!<
- * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
- * via the SIMDeverywhere polyfill provided with the
- * Emscripten SDK.
- */
- XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
- XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
-};
/*!
* @ingroup tuning
* @brief Selects the minimum alignment for XXH3's accumulators.
@@ -3574,13 +3967,6 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
/* Actual definition */
#ifndef XXH_DOXYGEN
-# define XXH_SCALAR 0
-# define XXH_SSE2 1
-# define XXH_AVX2 2
-# define XXH_AVX512 3
-# define XXH_NEON 4
-# define XXH_VSX 5
-# define XXH_SVE 6
#endif
#ifndef XXH_VECTOR /* can be defined on command line */
@@ -3605,6 +3991,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|| (defined(__s390x__) && defined(__VEC__)) \
&& defined(__GNUC__) /* TODO: IBM XL */
# define XXH_VECTOR XXH_VSX
+# elif defined(__loongarch_sx)
+# define XXH_VECTOR XXH_LSX
# else
# define XXH_VECTOR XXH_SCALAR
# endif
@@ -3642,6 +4030,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
# define XXH_ACC_ALIGN 64
# elif XXH_VECTOR == XXH_SVE /* sve */
# define XXH_ACC_ALIGN 64
+# elif XXH_VECTOR == XXH_LSX /* lsx */
+# define XXH_ACC_ALIGN 64
# endif
#endif
@@ -3655,7 +4045,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
#endif
#if defined(__GNUC__) || defined(__clang__)
-# define XXH_ALIASING __attribute__((may_alias))
+# define XXH_ALIASING __attribute__((__may_alias__))
#else
# define XXH_ALIASING /* nothing */
#endif
@@ -4408,8 +4798,6 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
}
}
-#define XXH3_MIDSIZE_MAX 240
-
XXH_NO_INLINE XXH_PUREF XXH64_hash_t
XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
@@ -5281,6 +5669,71 @@ XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
#endif
+#if (XXH_VECTOR == XXH_LSX)
+#define _LSX_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_lsx( void* XXH_RESTRICT acc,
+ const void* XXH_RESTRICT input,
+ const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ {
+ __m128i* const xacc = (__m128i *) acc;
+ const __m128i* const xinput = (const __m128i *) input;
+ const __m128i* const xsecret = (const __m128i *) secret;
+
+ for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {
+ /* data_vec = xinput[i]; */
+ __m128i const data_vec = __lsx_vld(xinput + i, 0);
+ /* key_vec = xsecret[i]; */
+ __m128i const key_vec = __lsx_vld(xsecret + i, 0);
+ /* data_key = data_vec ^ key_vec; */
+ __m128i const data_key = __lsx_vxor_v(data_vec, key_vec);
+ /* data_key_lo = data_key >> 32; */
+ __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32);
+ // __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32);
+ /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+ __m128i const product = __lsx_vmulwev_d_wu(data_key, data_key_lo);
+ /* xacc[i] += swap(data_vec); */
+ __m128i const data_swap = __lsx_vshuf4i_w(data_vec, _LSX_SHUFFLE(1, 0, 3, 2));
+ __m128i const sum = __lsx_vadd_d(xacc[i], data_swap);
+ /* xacc[i] += product; */
+ xacc[i] = __lsx_vadd_d(product, sum);
+ }
+ }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(lsx)
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_lsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+ XXH_ASSERT((((size_t)acc) & 15) == 0);
+ {
+ __m128i* const xacc = (__m128i*) acc;
+ const __m128i* const xsecret = (const __m128i *) secret;
+ const __m128i prime32 = __lsx_vreplgr2vr_w((int)XXH_PRIME32_1);
+
+ for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {
+ /* xacc[i] ^= (xacc[i] >> 47) */
+ __m128i const acc_vec = xacc[i];
+ __m128i const shifted = __lsx_vsrli_d(acc_vec, 47);
+ __m128i const data_vec = __lsx_vxor_v(acc_vec, shifted);
+ /* xacc[i] ^= xsecret[i]; */
+ __m128i const key_vec = __lsx_vld(xsecret + i, 0);
+ __m128i const data_key = __lsx_vxor_v(data_vec, key_vec);
+
+ /* xacc[i] *= XXH_PRIME32_1; */
+ __m128i const data_key_hi = __lsx_vsrli_d(data_key, 32);
+ __m128i const prod_lo = __lsx_vmulwev_d_wu(data_key, prime32);
+ __m128i const prod_hi = __lsx_vmulwev_d_wu(data_key_hi, prime32);
+ xacc[i] = __lsx_vadd_d(prod_lo, __lsx_vslli_d(prod_hi, 32));
+ }
+ }
+}
+
+#endif
+
/* scalar variants - universal */
#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
@@ -5511,6 +5964,12 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#elif (XXH_VECTOR == XXH_LSX)
+#define XXH3_accumulate_512 XXH3_accumulate_512_lsx
+#define XXH3_accumulate XXH3_accumulate_lsx
+#define XXH3_scrambleAcc XXH3_scrambleAcc_lsx
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
#else /* scalar */
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
@@ -5566,7 +6025,7 @@ XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret
acc[1] ^ XXH_readLE64(secret+8) );
}
-static XXH64_hash_t
+static XXH_PUREF XXH64_hash_t
XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
{
xxh_u64 result64 = start;
@@ -5593,6 +6052,15 @@ XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secre
return XXH3_avalanche(result64);
}
+/* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+
+static XXH_PUREF XXH64_hash_t
+XXH3_finalizeLong_64b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 len)
+{
+ return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, len * XXH_PRIME64_1);
+}
+
#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
@@ -5608,10 +6076,8 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
/* converge into final hash */
XXH_STATIC_ASSERT(sizeof(acc) == 64);
- /* do not align on 8, so that the secret is different from the accumulator */
-#define XXH_SECRET_MERGEACCS_START 11
XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
- return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+ return XXH3_finalizeLong_64b(acc, (const xxh_u8*)secret, (xxh_u64)len);
}
/*
@@ -5747,7 +6213,7 @@ XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH
/* === XXH3 streaming === */
#ifndef XXH_NO_STREAM
/*
- * Malloc's a pointer that is always aligned to align.
+ * Malloc's a pointer that is always aligned to @align.
*
* This must be freed with `XXH_alignedFree()`.
*
@@ -5815,8 +6281,12 @@ static void XXH_alignedFree(void* p)
/*!
* @brief Allocate an @ref XXH3_state_t.
*
- * Must be freed with XXH3_freeState().
- * @return An allocated XXH3_state_t on success, `NULL` on failure.
+ * @return An allocated pointer of @ref XXH3_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH3_freeState().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
{
@@ -5830,9 +6300,13 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
/*!
* @brief Frees an @ref XXH3_state_t.
*
- * Must be allocated with XXH3_createState().
* @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
- * @return XXH_OK.
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note Must be allocated with XXH3_createState().
+ *
+ * @see @ref streaming_example "Streaming Example"
*/
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
{
@@ -6111,9 +6585,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t*
if (state->totalLen > XXH3_MIDSIZE_MAX) {
XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
XXH3_digest_long(acc, state, secret);
- return XXH3_mergeAccs(acc,
- secret + XXH_SECRET_MERGEACCS_START,
- (xxh_u64)state->totalLen * XXH_PRIME64_1);
+ return XXH3_finalizeLong_64b(acc, secret, (xxh_u64)state->totalLen);
}
/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
if (state->useSeed)
@@ -6405,6 +6877,17 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
}
}
+static XXH_PUREF XXH128_hash_t
+XXH3_finalizeLong_128b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, size_t secretSize, xxh_u64 len)
+{
+ XXH128_hash_t h128;
+ h128.low64 = XXH3_finalizeLong_64b(acc, secret, len);
+ h128.high64 = XXH3_mergeAccs(acc, secret + secretSize
+ - XXH_STRIPE_LEN - XXH_SECRET_MERGEACCS_START,
+ ~(len * XXH_PRIME64_2));
+ return h128;
+}
+
XXH_FORCE_INLINE XXH128_hash_t
XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
@@ -6418,16 +6901,7 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
/* converge into final hash */
XXH_STATIC_ASSERT(sizeof(acc) == 64);
XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
- { XXH128_hash_t h128;
- h128.low64 = XXH3_mergeAccs(acc,
- secret + XXH_SECRET_MERGEACCS_START,
- (xxh_u64)len * XXH_PRIME64_1);
- h128.high64 = XXH3_mergeAccs(acc,
- secret + secretSize
- - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
- ~((xxh_u64)len * XXH_PRIME64_2));
- return h128;
- }
+ return XXH3_finalizeLong_128b(acc, secret, secretSize, (xxh_u64)len);
}
/*
@@ -6610,19 +7084,10 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_
XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
XXH3_digest_long(acc, state, secret);
XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
- { XXH128_hash_t h128;
- h128.low64 = XXH3_mergeAccs(acc,
- secret + XXH_SECRET_MERGEACCS_START,
- (xxh_u64)state->totalLen * XXH_PRIME64_1);
- h128.high64 = XXH3_mergeAccs(acc,
- secret + state->secretLimit + XXH_STRIPE_LEN
- - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
- ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
- return h128;
- }
+ return XXH3_finalizeLong_128b(acc, secret, state->secretLimit + XXH_STRIPE_LEN, (xxh_u64)state->totalLen);
}
/* len <= XXH3_MIDSIZE_MAX : short code */
- if (state->seed)
+ if (state->useSeed)
return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
secret, state->secretLimit + XXH_STRIPE_LEN);
=====================================
testsuite/tests/core-to-stg/T25284/A.hs
=====================================
@@ -0,0 +1,8 @@
+{-# OPTIONS_GHC -fspec-eval-dictfun #-}
+module A (testX) where
+
+import qualified Cls
+
+-- this creates the big dictionary strictly because of speculative evaluation
+testX :: (Show a, Cls.HasConst a) => a -> Int -> IO ()
+testX a b = Cls.printConst a b
=====================================
testsuite/tests/core-to-stg/T25284/B.hs
=====================================
@@ -0,0 +1,8 @@
+{-# OPTIONS_GHC -fno-spec-eval-dictfun #-}
+module B (testX) where
+
+import qualified Cls
+
+-- this creates the big dictionary lazily
+testX :: (Show a, Cls.HasConst a) => a -> Int -> IO ()
+testX a b = Cls.printConst a b
=====================================
testsuite/tests/core-to-stg/T25284/Cls.hs
=====================================
@@ -0,0 +1,40 @@
+{-# LANGUAGE UndecidableInstances #-}
+
+module Cls where
+
+class HasConst a where constVal :: a
+
+instance Cls.HasConst Word where constVal = 123
+
+instance Cls.HasConst Int where constVal = 456
+
+-- this class has a big dictionary
+class HasConst10 a where
+ constA :: a
+ constInt1 :: a -> Int
+ constInt1 _ = 1
+ constInt2 :: a -> Int
+ constInt2 _ = 2
+ constInt3 :: a -> Int
+ constInt3 _ = 3
+ constInt4 :: a -> Int
+ constInt4 _ = 4
+ constInt5 :: a -> Int
+ constInt5 _ = 5
+ constInt6 :: a -> Int
+ constInt6 _ = 6
+ constInt7 :: a -> Int
+ constInt7 _ = 7
+ constInt8 :: a -> Int
+ constInt8 _ = 8
+ constInt9 :: a -> Int
+ constInt9 _ = 9
+
+instance HasConst a => HasConst10 a where
+ constA = constVal
+
+-- this doesn't use the big dictionary most of the time
+printConst :: forall a. (Show a, HasConst10 a)
+ => a -> Int -> IO ()
+printConst x 5000 = print @a constA >> print (constInt8 x)
+printConst _ _ = pure ()
=====================================
testsuite/tests/core-to-stg/T25284/Main.hs
=====================================
@@ -0,0 +1,57 @@
+{-
+
+ This tests that speculative evaluation for dictionary functions works as
+ expected, with a large dictionary that goes unused.
+
+ - Module A: dictfun speculative evaluation enabled
+ - Module B: dictfun speculative evaluation disabled
+
+ Speculative evaluation causes the unused large dictionary to be allocated
+ strictly in module A, so we expect more allocations than in module B.
+
+ -}
+module Main where
+
+import qualified A
+import qualified B
+import qualified Cls
+
+import Data.Word
+import System.Mem (performGC)
+import GHC.Stats
+import Control.Monad
+
+{-# NOINLINE getAllocated #-}
+getAllocated :: IO Word64
+getAllocated = do
+ performGC
+ allocated_bytes <$> getRTSStats
+
+main :: IO ()
+main = do
+ -- warm up (just in case)
+ _ <- testMain A.testX
+ _ <- testMain B.testX
+
+ -- for real
+ a_alloc <- testMain A.testX
+ b_alloc <- testMain B.testX
+
+ -- expect B to allocate less than A
+ let alloc_ratio :: Double
+ alloc_ratio = fromIntegral b_alloc / fromIntegral a_alloc
+ putStrLn ("expected alloc: " ++ show (alloc_ratio < 0.7))
+
+iter :: (Int -> IO ()) -> Int -> Int -> IO ()
+iter m !i !j
+ | i < j = m i >> iter m (i+1) j
+ | otherwise = pure ()
+
+{-# NOINLINE testMain #-}
+testMain :: (forall b. (Show b, Cls.HasConst b) => b -> Int -> IO ())
+ -> IO Word64
+testMain f = do
+ alloc0 <- getAllocated
+ iter (\i -> f (0::Int) i >> f (0::Word) i) 1 100000
+ alloc1 <- getAllocated
+ pure (alloc1 - alloc0)
=====================================
testsuite/tests/core-to-stg/T25284/T25284.stdout
=====================================
@@ -0,0 +1,17 @@
+456
+8
+123
+8
+456
+8
+123
+8
+456
+8
+123
+8
+456
+8
+123
+8
+expected alloc: True
=====================================
testsuite/tests/core-to-stg/T25284/all.T
=====================================
@@ -0,0 +1,6 @@
+test('T25284',
+ [js_skip, # allocation counters aren't available on the JS backend
+ extra_files(['Main.hs', 'A.hs', 'B.hs', 'Cls.hs']),
+ extra_run_opts('+RTS -T -RTS')],
+ multimod_compile_and_run,
+ ['Main', ''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a0d4dd425246cb967a7ce00b45da91995313e3f2...956f841060f900e374f60bb7d78d6b862e4593fd
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/a0d4dd425246cb967a7ce00b45da91995313e3f2...956f841060f900e374f60bb7d78d6b862e4593fd
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250108/9f50b28d/attachment-0001.html>
More information about the ghc-commits
mailing list