Skip to content

X86 signed mul #192

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified doc/img/i128_graphs/windows/x86_benchmarks.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/img/i128_graphs/windows/x86_relative_performance.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions doc/signed_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@
# MSVC 14.3 - x86
data = {
'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
'std::_Signed128': [3187340, 185960, 979025, 1896082, 5566403, 4697289],
'int128_t': [3046252, 189165, 192609, 3569921, 4348306, 4793845],
'boost::mp::int128_t': [4269507, 2488618, 2783600, 4908622, 6835035, 6476032]
'std::_Signed128': [3495288, 199936, 1089785, 2653505, 7267297, 5779771],
'int128_t': [3520950, 212116, 210354, 2595285, 5516460, 5842785],
'boost::mp::int128_t': [7877534, 3477656, 4108539, 7030276, 10229356, 9069360]
}

df = pd.DataFrame(data)
Expand Down
8 changes: 4 additions & 4 deletions include/boost/int128/detail/common_mul.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ BOOST_INT128_FORCE_INLINE constexpr void to_words(const T& x, std::uint32_t (&wo

#endif

words[0] = static_cast<std::uint32_t>(x.low & UINT32_MAX); // LCOV_EXCL_LINE
words[1] = static_cast<std::uint32_t>(x.low >> 32); // LCOV_EXCL_LINE
words[2] = static_cast<std::uint32_t>(x.high & UINT32_MAX); // LCOV_EXCL_LINE
words[3] = static_cast<std::uint32_t>(x.high >> 32); // LCOV_EXCL_LINE
words[0] = static_cast<std::uint32_t>(x.low & UINT32_MAX); // LCOV_EXCL_LINE
words[1] = static_cast<std::uint32_t>(x.low >> 32); // LCOV_EXCL_LINE
words[2] = static_cast<std::uint32_t>(static_cast<std::uint64_t>(x.high) & UINT32_MAX); // LCOV_EXCL_LINE
words[3] = static_cast<std::uint32_t>(static_cast<std::uint64_t>(x.high) >> 32); // LCOV_EXCL_LINE
}


Expand Down
80 changes: 39 additions & 41 deletions include/boost/int128/detail/int128_imp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2210,6 +2210,41 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t library_mul(const int128_t lhs, con
return result;
}

BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint64_t rhs) noexcept
{
const auto low_res{lhs.low * rhs};

const auto a_lo{lhs.low & UINT32_MAX};
const auto a_high{lhs.low >> 32U};
const auto b_lo{rhs & UINT32_MAX};
const auto b_high{rhs >> 32U};

const auto lo_lo{a_lo * b_lo};
const auto lo_hi{a_lo * b_high};
const auto hi_lo{a_high * b_lo};
const auto hi_hi{a_high * b_high};

const auto mid{(lo_lo >> 32U) + (lo_hi & UINT32_MAX) + (hi_lo & UINT32_MAX)};

const auto carry{hi_hi + (lo_hi >> 32) + (hi_lo >> 32) + (mid >> 32)};

const auto high_res{lhs.high * static_cast<std::int64_t>(rhs) + static_cast<std::int64_t>(carry)};

return {high_res, low_res};
}

BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint32_t rhs) noexcept
{
const auto low_res{lhs.low * rhs};

const auto a_hi{lhs.low >> 32U};
const auto hi_lo{a_hi * rhs};

const auto high_res{lhs.high * static_cast<std::int64_t>(rhs) + static_cast<std::int64_t>(hi_lo)};

return {high_res, low_res};
}

#if defined(_M_AMD64) && !defined(__GNUC__)

BOOST_INT128_FORCE_INLINE int128_t msvc_amd64_mul(const int128_t lhs, const int128_t rhs) noexcept
Expand Down Expand Up @@ -2284,16 +2319,14 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, con
}
else
{
const auto negative {static_cast<bool>((lhs < 0) ^ (rhs < 0))};

std::uint32_t lhs_words[4] {};
std::uint32_t rhs_words[4] {};
to_words(abs(lhs), lhs_words);
to_words(abs(rhs), rhs_words);

const auto result {knuth_multiply<int128_t>(lhs_words, rhs_words)};
// Since in all likelihood this equates to memcpy we don't need to convert to non-negative integers and back
to_words(lhs, lhs_words);
to_words(rhs, rhs_words);

return negative ? -result : result;
return knuth_multiply<int128_t>(lhs_words, rhs_words);
}

#else
Expand All @@ -2303,41 +2336,6 @@ BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, con
#endif
}

BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint64_t rhs) noexcept
{
const auto low_res {lhs.low * rhs};

const auto a_lo {lhs.low & UINT32_MAX};
const auto a_high {lhs.low >> 32U};
const auto b_lo {rhs & UINT32_MAX};
const auto b_high {rhs >> 32U};

const auto lo_lo {a_lo * b_lo};
const auto lo_hi {a_lo * b_high};
const auto hi_lo {a_high * b_lo};
const auto hi_hi {a_high * b_high};

const auto mid {(lo_lo >> 32U) + (lo_hi & UINT32_MAX) + (hi_lo & UINT32_MAX)};

const auto carry {hi_hi + (lo_hi >> 32) + (hi_lo >> 32) + (mid >> 32)};

const auto high_res {lhs.high * static_cast<std::int64_t>(rhs) + static_cast<std::int64_t>(carry)};

return {high_res, low_res};
}

BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint32_t rhs) noexcept
{
const auto low_res {lhs.low * rhs};

const auto a_hi {lhs.low >> 32U};
const auto hi_lo {a_hi * rhs};

const auto high_res {lhs.high * static_cast<std::int64_t>(rhs) + static_cast<std::int64_t>(hi_lo)};

return {high_res, low_res};
}

} // namespace detail

constexpr int128_t operator*(const int128_t lhs, const int128_t rhs) noexcept
Expand Down