Skip to content

Commit 7db92b7

Browse files
authored
Merge pull request #284 from SChernykh/opt-rcp
Optimized randomx_reciprocal
2 parents 5fc512e + 5c49ab1 commit 7db92b7

8 files changed

+24
-43
lines changed

src/assembly_generator_x86.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ namespace randomx {
445445
}
446446

447447
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
448-
uint64_t divisor = instr.getImm32();
448+
const uint32_t divisor = instr.getImm32();
449449
if (!isZeroOrPowerOf2(divisor)) {
450450
registerUsage[instr.dst] = i;
451451
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;

src/bytecode_machine.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ namespace randomx {
243243
}
244244

245245
if (opcode < ceil_IMUL_RCP) {
246-
uint64_t divisor = instr.getImm32();
246+
const uint32_t divisor = instr.getImm32();
247247
if (!isZeroOrPowerOf2(divisor)) {
248248
auto dst = instr.dst % RegistersCount;
249249
ibc.type = InstructionType::IMUL_R;

src/jit_compiler_a64.cpp

+4-15
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
686686

687687
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
688688
{
689-
const uint64_t divisor = instr.getImm32();
689+
const uint32_t divisor = instr.getImm32();
690690
if (isZeroOrPowerOf2(divisor))
691691
return;
692692

@@ -695,22 +695,11 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
695695
constexpr uint32_t tmp_reg = 20;
696696
const uint32_t dst = IntRegMap[instr.dst];
697697

698-
constexpr uint64_t N = 1ULL << 63;
699-
const uint64_t q = N / divisor;
700-
const uint64_t r = N % divisor;
701-
#ifdef __GNUC__
702-
const uint64_t shift = 64 - __builtin_clzll(divisor);
703-
#else
704-
uint64_t shift = 32;
705-
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
706-
--shift;
707-
#endif
708-
709698
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
710-
711699
literalPos -= sizeof(uint64_t);
712-
const uint64_t randomx_reciprocal = (q << shift) + ((r << shift) / divisor);
713-
memcpy(code + literalPos, &randomx_reciprocal, sizeof(randomx_reciprocal));
700+
701+
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
702+
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
714703

715704
if (literal_id < 12)
716705
{

src/jit_compiler_rv64.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ namespace randomx {
776776
}
777777

778778
static void v1_IMUL_RCP(HANDLER_ARGS) {
779-
uint64_t divisor = isn.getImm32();
779+
const uint32_t divisor = isn.getImm32();
780780
if (!isZeroOrPowerOf2(divisor)) {
781781
state.registerUsage[isn.dst] = i;
782782
if (state.rcpCount < 4) {

src/jit_compiler_x86.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ namespace randomx {
618618
}
619619

620620
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
621-
uint64_t divisor = instr.getImm32();
621+
const uint32_t divisor = instr.getImm32();
622622
if (!isZeroOrPowerOf2(divisor)) {
623623
registerUsage[instr.dst] = i;
624624
emit(MOV_RAX_I);

src/reciprocal.c

+13-21
Original file line numberDiff line numberDiff line change
@@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4444
ret
4545
4646
*/
47-
uint64_t randomx_reciprocal(uint64_t divisor) {
47+
uint64_t randomx_reciprocal(uint32_t divisor) {
4848

4949
assert(divisor != 0);
5050

5151
const uint64_t p2exp63 = 1ULL << 63;
52+
const uint64_t q = p2exp63 / divisor;
53+
const uint64_t r = p2exp63 % divisor;
54+
55+
#ifdef __GNUC__
56+
const uint32_t shift = 64 - __builtin_clzll(divisor);
57+
#else
58+
uint32_t shift = 32;
59+
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
60+
--shift;
61+
#endif
5262

53-
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
54-
55-
unsigned bsr = 0; //highest set bit in divisor
56-
57-
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
58-
bsr++;
59-
60-
for (unsigned shift = 0; shift < bsr; shift++) {
61-
if (remainder >= divisor - remainder) {
62-
quotient = quotient * 2 + 1;
63-
remainder = remainder * 2 - divisor;
64-
}
65-
else {
66-
quotient = quotient * 2;
67-
remainder = remainder * 2;
68-
}
69-
}
70-
71-
return quotient;
63+
return (q << shift) + ((r << shift) / divisor);
7264
}
7365

7466
#if !RANDOMX_HAVE_FAST_RECIPROCAL
7567

76-
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
68+
uint64_t randomx_reciprocal_fast(uint32_t divisor) {
7769
return randomx_reciprocal(divisor);
7870
}
7971

src/reciprocal.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4040
extern "C" {
4141
#endif
4242

43-
uint64_t randomx_reciprocal(uint64_t);
44-
uint64_t randomx_reciprocal_fast(uint64_t);
43+
uint64_t randomx_reciprocal(uint32_t);
44+
uint64_t randomx_reciprocal_fast(uint32_t);
4545

4646
#if defined(__cplusplus)
4747
}

src/tests/perf-simulation.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
477477
}
478478

479479
if (opcode < randomx::ceil_IMUL_RCP) {
480-
uint64_t divisor = instr.getImm32();
480+
const uint32_t divisor = instr.getImm32();
481481
if (!randomx::isZeroOrPowerOf2(divisor)) {
482482
instr.dst = instr.dst % randomx::RegistersCount;
483483
instr.opcode |= DST_INT;

0 commit comments

Comments
 (0)