Skip to content

Commit ef2aaa3

Browse files
m-alvarezandreaslyn
authored andcommitted
Avoid std::tie
1 parent 2b3376b commit ef2aaa3

File tree

1 file changed

+88
-44
lines changed

1 file changed

+88
-44
lines changed

category/vm/runtime/uint256.hpp

Lines changed: 88 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#endif
3333

3434
#ifndef __BMI2__
35-
# error "Target architecture must support BMI2 (for MULX)"
35+
#error "Target architecture must support BMI2 (for MULX)"
3636
#endif
3737

3838
// GCC's overeager SLP vectorizer sometimes pessimizes code. For functions that
@@ -732,84 +732,89 @@ namespace monad::vm::runtime
732732
}
733733

734734
[[gnu::always_inline]]
735-
inline constexpr std::pair<uint64_t, uint64_t>
736-
mulx_constexpr(uint64_t const x, uint64_t const y) noexcept
735+
inline constexpr void mulx_constexpr(
736+
uint64_t const x, uint64_t const y, uint64_t &r_hi,
737+
uint64_t &r_lo) noexcept
737738
{
738-
uint128_t const prod = static_cast<uint128_t>(x) * static_cast<uint128_t>(y);
739-
uint64_t const hi = static_cast<uint64_t>(prod >> uint128_t{64});
740-
uint64_t const lo = static_cast<uint64_t>(prod);
741-
return {hi, lo};
739+
uint128_t const prod =
740+
static_cast<uint128_t>(x) * static_cast<uint128_t>(y);
741+
r_hi = static_cast<uint64_t>(prod >> uint128_t{64});
742+
r_lo = static_cast<uint64_t>(prod);
742743
}
743744

744745
[[gnu::always_inline]]
745-
inline std::pair<uint64_t, uint64_t>
746-
mulx_intrinsic(uint64_t const x, uint64_t const y) noexcept
746+
inline void mulx_intrinsic(
747+
uint64_t const x, uint64_t const y, uint64_t &r_hi,
748+
uint64_t &r_lo) noexcept
747749
{
750+
/*
748751
uint64_t hi;
749752
uint64_t lo;
753+
*/
750754
asm("mulx %[x], %[lo], %[hi]"
751-
: [hi] "=r"(hi), [lo] "=r"(lo)
755+
: [hi] "=r"(r_hi), [lo] "=r"(r_lo)
752756
: [x] "r"(x), [y] "d"(y));
753-
return {hi, lo};
754757
}
755758

756759
[[gnu::always_inline]]
757-
inline constexpr std::pair<uint64_t, uint64_t>
758-
mulx(uint64_t const x, uint64_t const y) noexcept
760+
inline constexpr void mulx(
761+
uint64_t const x, uint64_t const y, uint64_t &r_hi,
762+
uint64_t &r_lo) noexcept
759763
{
760764
if consteval {
761-
return mulx_constexpr(x, y);
765+
return mulx_constexpr(x, y, r_hi, r_lo);
762766
}
763767
else {
764-
return mulx_intrinsic(x, y);
768+
return mulx_intrinsic(x, y, r_hi, r_lo);
765769
}
766770
}
767771

768772
template <size_t M>
769773
using words_t = std::array<uint64_t, M>;
770774

771775
[[gnu::always_inline]]
772-
inline std::tuple<uint64_t, uint64_t, uint64_t> adc_3(
773-
std::tuple<uint64_t, uint64_t, uint64_t> const x,
774-
std::tuple<uint64_t, uint64_t> const y) noexcept
776+
inline void adc_3(
777+
uint64_t x_2, uint64_t x_1, uint64_t x_0, uint64_t const y_1,
778+
uint64_t const y_0, uint64_t &r_2, uint64_t &r_1,
779+
uint64_t &r_0) noexcept
775780
{
776-
auto [x_2, x_1, x_0] = x;
777-
auto [y_1, y_0] = y;
778781
asm("addq %[y_0], %[x_0]\n"
779782
"adcq %[y_1], %[x_1]\n"
780783
"adcq $0, %[x_2]"
781784
: [x_0] "+r"(x_0), [x_1] "+r"(x_1), [x_2] "+r"(x_2)
782785
: [y_0] "r"(y_0), [y_1] "r"(y_1)
783786
: "cc");
784-
return {x_2, x_1, x_0};
787+
r_2 = x_2;
788+
r_1 = x_1;
789+
r_0 = x_0;
785790
}
786791

787792
[[gnu::always_inline]]
788-
inline std::pair<uint64_t, uint64_t>
789-
adc_2(std::pair<uint64_t, uint64_t> const x, uint64_t const y_0) noexcept
793+
inline void adc_2(
794+
uint64_t x_1, uint64_t x_0, uint64_t const y_0, uint64_t &r_1,
795+
uint64_t &r_0) noexcept
790796
{
791-
auto [x_1, x_0] = x;
792797
asm("addq %[y_0], %[x_0]\n"
793798
"adcq $0, %[x_1]"
794799
: [x_0] "+r"(x_0), [x_1] "+r"(x_1)
795800
: [y_0] "r"(y_0)
796801
: "cc");
797-
return {x_1, x_0};
802+
r_1 = x_1;
803+
r_0 = x_0;
798804
}
799805

800806
[[gnu::always_inline]]
801-
inline std::pair<uint64_t, uint64_t> adc_2(
802-
std::pair<uint64_t, uint64_t> const x,
803-
std::pair<uint64_t, uint64_t> const y) noexcept
807+
inline void adc_2(
808+
uint64_t x_1, uint64_t x_0, uint64_t const y_1, uint64_t const y_0,
809+
uint64_t &r_1, uint64_t &r_0) noexcept
804810
{
805-
auto [x_1, x_0] = x;
806-
auto [y_1, y_0] = y;
807811
asm("addq %[y_0], %[x_0]\n"
808812
"adcq %[y_1], %[x_1]"
809813
: [x_0] "+r"(x_0), [x_1] "+r"(x_1)
810814
: [y_0] "r"(y_0), [y_1] "r"(y_1)
811815
: "cc");
812-
return {x_1, x_0};
816+
r_1 = x_1;
817+
r_0 = x_0;
813818
}
814819

815820
template <size_t I, size_t R, size_t M>
@@ -820,8 +825,18 @@ namespace monad::vm::runtime
820825
{
821826
if constexpr (I < std::min(R, M)) {
822827
if constexpr (I + 1 < R) {
823-
auto const [hi, lo] = mulx(x[I], y);
824-
std::tie(carry, result[I]) = adc_2({hi, lo}, carry);
828+
uint64_t hi;
829+
uint64_t lo;
830+
mulx(x[I], y, hi, lo);
831+
adc_2(
832+
// Input 1
833+
hi,
834+
lo,
835+
// Input 2
836+
carry,
837+
// Output
838+
carry,
839+
result[I]);
825840
mul_line_recur<I + 1, R, M>(x, y, result, carry);
826841
}
827842
else {
@@ -841,7 +856,7 @@ namespace monad::vm::runtime
841856
words_t<R> &__restrict__ result) noexcept
842857
{
843858
uint64_t carry;
844-
std::tie(carry, result[0]) = mulx(y, x[0]);
859+
mulx(y, x[0], carry, result[0]);
845860

846861
mul_line_recur<1, R, M>(x, y, result, carry);
847862
}
@@ -855,15 +870,35 @@ namespace monad::vm::runtime
855870
if constexpr (J + 1 < M && I + J < R) {
856871
if constexpr (I + J + 2 < R) {
857872
// We need c_lo, c_hi
858-
auto const [hi, lo] = mulx(x[J + 1], y_i);
859-
std::tie(c_hi, c_lo, result[I + J]) =
860-
adc_3({hi, lo, result[I + J]}, {c_hi, c_lo});
873+
uint64_t hi;
874+
uint64_t lo;
875+
mulx(x[J + 1], y_i, hi, lo);
876+
adc_3(
877+
// Input 1
878+
hi,
879+
lo,
880+
result[I + J],
881+
// Input 2
882+
c_hi,
883+
c_lo,
884+
// Result
885+
c_hi,
886+
c_lo,
887+
result[I + J]);
861888
}
862889
else if constexpr (I + J + 1 < R) {
863890
// We only need c_lo
864891
uint64_t const lo = x[J + 1] * y_i;
865-
std::tie(c_lo, result[I + J]) =
866-
adc_2({lo, result[I + J]}, {c_hi, c_lo});
892+
adc_2(
893+
// Input 1
894+
lo,
895+
result[I + J],
896+
// Input 2
897+
c_hi,
898+
c_lo,
899+
// Output
900+
c_lo,
901+
result[I + J]);
867902
}
868903
else {
869904
// We're done, we don't need subsequent results
@@ -873,9 +908,16 @@ namespace monad::vm::runtime
873908
}
874909
else {
875910
if constexpr (I + M < R) {
876-
auto [hi, lo] = adc_2({c_hi, c_lo}, result[I + M - 1]);
877-
result[I + M - 1] = lo;
878-
result[I + M] = hi;
911+
912+
adc_2(
913+
// Input 1
914+
c_hi,
915+
c_lo,
916+
// Input 2
917+
result[I + M - 1],
918+
// Output
919+
result[I + M],
920+
result[I + M - 1]);
879921
}
880922
else if constexpr (I + M < R + 1) {
881923
result[I + M - 1] += c_lo;
@@ -904,7 +946,7 @@ namespace monad::vm::runtime
904946
uint64_t c_lo;
905947

906948
if constexpr (I + 1 < R) {
907-
std::tie(c_hi, c_lo) = mulx(x[0], y_i);
949+
mulx(x[0], y_i, c_hi, c_lo);
908950
}
909951
else {
910952
c_hi = 0;
@@ -947,7 +989,9 @@ namespace monad::vm::runtime
947989
for (size_t j = 0; j < N; j++) {
948990
uint64_t carry = 0;
949991
for (size_t i = 0; i < M && i + j < R; i++) {
950-
auto const [hi, lo] = mulx(x[i], y[j]);
992+
uint64_t hi;
993+
uint64_t lo;
994+
mulx(x[i], y[j], hi, lo);
951995

952996
auto const [s0, c0] = addc(lo, result[i + j], false);
953997
auto const [s1, c1] = addc(s0, carry, false);

0 commit comments

Comments
 (0)