From e1d1a897bcc7da194b427aac6cf5093fb4170e24 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Fri, 12 Sep 2025 21:14:18 +0300 Subject: [PATCH] libexpr: Use 16-byte atomic loads/stores MOVAPS/MOVDQA for ValueStorage<8> See the extensive comment in the code for the reasons why we have to do it this way. The motivation for the change is to simplify the eventual implementation of parallel evaluation. Being able to atomically update the whole Value is much easier to reason about. --- src/libexpr/include/nix/expr/value.hh | 120 +++++++++++++++++++++++++- src/libexpr/meson.build | 4 + src/libexpr/package.nix | 4 +- src/libexpr/value.cc | 21 +++++ 4 files changed, 144 insertions(+), 5 deletions(-) diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index c74588a31a2..1732250134a 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -14,6 +14,10 @@ #include +#if defined(__x86_64__) && defined(__SSE2__) +# include +#endif + namespace nix { struct Value; @@ -362,6 +366,11 @@ protected: { return internalType; } + + static bool isAtomic() + { + return false; + } }; namespace detail { @@ -395,7 +404,11 @@ class alignas(16) ValueStorage::type; using Payload = std::array; - Payload payload = {}; +#if defined(__x86_64__) && defined(__SSE2__) + __m128i payloadWords; +#else + Payload payloadWords = {}; +#endif static constexpr int discriminatorBits = 3; static constexpr PackedPointer discriminatorMask = (PackedPointer(1) << discriminatorBits) - 1; @@ -439,6 +452,54 @@ class alignas(16) ValueStorage(payload)); + } + + [[gnu::always_inline]] + Payload loadPayload() const noexcept + { + /* This intrinsic corresponds to MOVDQA. Note that Value (and thus the first member + payloadWords) is 16 bytes aligned. */ + __m128i res = _mm_load_si128(&payloadWords); + return std::bit_cast(res); + } +#else + [[gnu::always_inline]] + void updatePayload(Payload payload) noexcept + { + payloadWords = payload; + } + + [[gnu::always_inline]] + Payload loadPayload() const noexcept + { + return payloadWords; + } +#endif + template requires std::is_pointer_v static T untagPointer(PackedPointer val) noexcept @@ -446,9 +507,9 @@ class alignas(16) ValueStorage(val & ~discriminatorMask); } - PrimaryDiscriminator getPrimaryDiscriminator() const noexcept + PrimaryDiscriminator getPrimaryDiscriminator(PackedPointer firstDWord) const noexcept { - return static_cast(payload[0] & discriminatorMask); + return static_cast(firstDWord & discriminatorMask); } static void assertAligned(PackedPointer val) noexcept @@ -459,25 +520,30 @@ class alignas(16) ValueStorage void setSingleDWordPayload(PackedPointer untaggedVal) noexcept { + Payload payload; /* There's plenty of free upper bits in the first dword, which is used only for the discriminator. */ payload[0] = static_cast(pdSingleDWord) | (static_cast(type) << discriminatorBits); payload[1] = untaggedVal; + updatePayload(payload); } template void setUntaggablePayload(T * firstPtrField, U untaggableField) noexcept { + Payload payload; static_assert(discriminator >= pdListN && discriminator <= pdPath); auto firstFieldPayload = std::bit_cast(firstPtrField); assertAligned(firstFieldPayload); payload[0] = static_cast(discriminator) | firstFieldPayload; payload[1] = std::bit_cast(untaggableField); + updatePayload(payload); } template void setPairOfPointersPayload(T * firstPtrField, U * secondPtrField) noexcept { + Payload payload; static_assert(type >= tListSmall && type <= tLambda); { auto firstFieldPayload = std::bit_cast(firstPtrField); @@ -489,21 +555,58 @@ class alignas(16) ValueStorage requires std::is_pointer_v && std::is_pointer_v void getPairOfPointersPayload(T & firstPtrField, U & secondPtrField) const noexcept { + Payload payload = loadPayload(); firstPtrField = untagPointer(payload[0]); secondPtrField = untagPointer(payload[1]); } +public: + ValueStorage() + { + updatePayload({}); + } + + ValueStorage(const ValueStorage & other) + { + updatePayload(other.loadPayload()); + } + + ValueStorage & operator=(const ValueStorage & other) + { + updatePayload(other.loadPayload()); + return *this; + } + + ValueStorage(ValueStorage && other) noexcept + { + updatePayload(other.loadPayload()); + other.updatePayload({}); // Zero out rhs + } + + ValueStorage & operator=(ValueStorage && other) noexcept + { + updatePayload(other.loadPayload()); + other.updatePayload({}); // Zero out rhs + return *this; + } + + ~ValueStorage() noexcept {} + protected: + static bool isAtomic(); + /** Get internal type currently occupying the storage. */ InternalType getInternalType() const noexcept { - switch (auto pd = getPrimaryDiscriminator()) { + Payload payload = loadPayload(); + switch (auto pd = getPrimaryDiscriminator(payload[0])) { case pdUninitialized: /* Discriminator value of zero is used to distinguish uninitialized values. */ return tUninitialized; @@ -545,6 +648,7 @@ protected: void getStorage(NixInt & integer) const noexcept { + Payload payload = loadPayload(); /* PackedPointerType -> int64_t here is well-formed, since the standard requires this conversion to follow 2's complement rules. This is just a no-op. */ integer = NixInt(payload[1]); @@ -552,6 +656,7 @@ protected: void getStorage(bool & boolean) const noexcept { + Payload payload = loadPayload(); boolean = payload[1]; } @@ -559,38 +664,45 @@ protected: void getStorage(NixFloat & fpoint) const noexcept { + Payload payload = loadPayload(); fpoint = std::bit_cast(payload[1]); } void getStorage(ExternalValueBase *& external) const noexcept { + Payload payload = loadPayload(); external = std::bit_cast(payload[1]); } void getStorage(PrimOp *& primOp) const noexcept { + Payload payload = loadPayload(); primOp = std::bit_cast(payload[1]); } void getStorage(Bindings *& attrs) const noexcept { + Payload payload = loadPayload(); attrs = std::bit_cast(payload[1]); } void getStorage(List & list) const noexcept { + Payload payload = loadPayload(); list.elems = untagPointer(payload[0]); list.size = payload[1]; } void getStorage(StringWithContext & string) const noexcept { + Payload payload = loadPayload(); string.context = untagPointer(payload[0]); string.c_str = std::bit_cast(payload[1]); } void getStorage(Path & path) const noexcept { + Payload payload = loadPayload(); path.accessor = untagPointer(payload[0]); path.path = std::bit_cast(payload[1]); } diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index 40d3f390b4b..9c841be09e9 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -82,6 +82,10 @@ configdata_priv.set( deps_other += toml11 +cpuid = dependency('libcpuid', 'cpuid', required : false) +configdata_priv.set('HAVE_LIBCPUID', cpuid.found().to_int()) +deps_private += cpuid + config_priv_h = configure_file( configuration : configdata_priv, output : 'expr-config-private.hh', diff --git a/src/libexpr/package.nix b/src/libexpr/package.nix index a67a8cc49ab..aec02619752 100644 --- a/src/libexpr/package.nix +++ b/src/libexpr/package.nix @@ -14,6 +14,7 @@ boehmgc, nlohmann_json, toml11, + libcpuid, # Configuration Options @@ -64,7 +65,8 @@ mkMesonLibrary (finalAttrs: { buildInputs = [ toml11 - ]; + ] + ++ lib.optional stdenv.hostPlatform.isx86_64 libcpuid; propagatedBuildInputs = [ nix-util diff --git a/src/libexpr/value.cc b/src/libexpr/value.cc index 07d036b0dd4..8dbb277750b 100644 --- a/src/libexpr/value.cc +++ b/src/libexpr/value.cc @@ -1,5 +1,11 @@ #include "nix/expr/value.hh" +#include "expr-config-private.hh" + +#if HAVE_LIBCPUID +# include +#endif + namespace nix { Value Value::vEmptyList = []() { @@ -26,4 +32,19 @@ Value Value::vFalse = []() { return res; }(); +template<> +bool ValueStorage<8>::isAtomic() +{ +#if HAVE_LIBCPUID + struct cpu_id_t data; + + if (cpu_identify(NULL, &data) < 0) + return false; + + return data.flags[CPU_FEATURE_AVX]; +#else + return false; // Can't tell +#endif +} + } // namespace nix