|
31 | 31 | #error "Target architecture must support AVX2" |
32 | 32 | #endif |
33 | 33 |
|
| 34 | +#ifndef __BMI2__ |
| 35 | +# error "Target architecture must support BMI2 (for MULX)" |
| 36 | +#endif |
| 37 | + |
34 | 38 | // GCC's overeager SLP vectorizer sometimes pessimizes code. For functions that |
35 | 39 | // are particularly sensitive about this (such as multiplication), the |
36 | 40 | // vectorizer can be turned off with the MONAD_VM_NO_VECTORIZE pragma. |
@@ -731,9 +735,9 @@ namespace monad::vm::runtime |
731 | 735 | inline constexpr std::pair<uint64_t, uint64_t> |
732 | 736 | mulx_constexpr(uint64_t const x, uint64_t const y) noexcept |
733 | 737 | { |
734 | | - auto const prod = static_cast<uint128_t>(x) * y; |
735 | | - auto const hi = static_cast<uint64_t>(prod >> 64); |
736 | | - auto const lo = static_cast<uint64_t>(prod); |
| 738 | + uint128_t const prod = static_cast<uint128_t>(x) * static_cast<uint128_t>(y); |
| 739 | + uint64_t const hi = static_cast<uint64_t>(prod >> uint128_t{64}); |
| 740 | + uint64_t const lo = static_cast<uint64_t>(prod); |
737 | 741 | return {hi, lo}; |
738 | 742 | } |
739 | 743 |
|
@@ -970,10 +974,10 @@ namespace monad::vm::runtime |
970 | 974 | requires(0 < R && 0 < M && 0 < N && R <= M + N) |
971 | 975 | { |
972 | 976 | if consteval { |
973 | | - return truncating_mul_constexpr<R>(x, y); |
| 977 | + return truncating_mul_constexpr<R, M, N>(x, y); |
974 | 978 | } |
975 | 979 | else { |
976 | | - return truncating_mul_runtime<R>(x, y); |
| 980 | + return truncating_mul_runtime<R, M, N>(x, y); |
977 | 981 | } |
978 | 982 | } |
979 | 983 |
|
|
0 commit comments