Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/compiler/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ static std::string getX86FeaturesStr() {
"sse", "sse2", "sse3", "ssse3", "sse4.1",
};
static std::vector<std::string> OptionalFeatures = {
"bmi",
"lzcnt",
"popcnt",
"adx", "bmi", "bmi2", "lzcnt", "popcnt",
};

llvm::StringMap<bool> HostFeatures;
Expand Down
170 changes: 145 additions & 25 deletions src/compiler/target/x86/x86lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
using namespace COMPILER;
using namespace llvm;

namespace {

static void assertZeroFlagChainOperand(const MInstruction *Operand) {
const auto *ConstInst = dyn_cast<ConstantInstruction>(Operand);
ZEN_ASSERT(ConstInst &&
Expand All @@ -19,6 +21,7 @@ static void assertZeroFlagChainOperand(const MInstruction *Operand) {
"x86 ADC/SBB lowering requires carry/borrow operand to be constant 0");
}

} // namespace
X86CgLowering::X86CgLowering(CgFunction &MF)
: CgLowering(MF), Subtarget(&MF.getSubtarget<X86Subtarget>()),
TRI(Subtarget->getRegisterInfo()) {
Expand Down Expand Up @@ -55,6 +58,73 @@ X86CgLowering::X86CgLowering(CgFunction &MF)
#endif
}

CgRegister X86CgLowering::emitAdd64NoCarry(const TargetRegisterClass *RC,
CgRegister LHSReg,
CgRegister RHSReg) {
return fastEmitInst_rr(X86::ADD64rr, RC, LHSReg, RHSReg);
}

std::pair<CgRegister, CgRegister>
X86CgLowering::emitAdd64WithCarryCounter(const TargetRegisterClass *RC,
CgRegister SumReg, CgRegister CarryReg,
CgRegister TermReg) {
CgRegister NewSum = fastEmitInst_rr(X86::ADD64rr, RC, SumReg, TermReg);
CgRegister NewCarry = fastEmitInst_ri(X86::ADC64ri32, RC, CarryReg, 0);
return {NewSum, NewCarry};
}

CgRegister X86CgLowering::emitAdcx64(const TargetRegisterClass *RC,
CgRegister DstReg, CgRegister SrcReg) {
return fastEmitInst_rr(X86::ADCX64rr, RC, DstReg, SrcReg);
}

CgRegister X86CgLowering::emitAdox64(const TargetRegisterClass *RC,
CgRegister DstReg, CgRegister SrcReg) {
return fastEmitInst_rr(X86::ADOX64rr, RC, DstReg, SrcReg);
}

CgRegister X86CgLowering::collectCarryChains(const TargetRegisterClass *RC,
CgRegister CarryReg,
CgRegister ZeroReg) {
CarryReg = emitAdcx64(RC, CarryReg, ZeroReg);
CarryReg = emitAdox64(RC, CarryReg, ZeroReg);
return CarryReg;
}

void X86CgLowering::clearCarryChains(CgRegister ZeroReg) {
fastEmitNoDefInst_rr(X86::TEST64rr, ZeroReg, ZeroReg);
}

std::pair<CgRegister, CgRegister>
X86CgLowering::emitMulx64(const TargetRegisterClass *RC,
CgRegister &MulxSourceReg, CgRegister &DeadMulxHiReg,
CgRegister SourceReg, CgRegister OperandReg,
bool NeedHigh) {
if (MulxSourceReg != SourceReg) {
SmallVector<CgOperand, 2> CopyToRDXOperands{
CgOperand::createRegOperand(X86::RDX, true),
CgOperand::createRegOperand(SourceReg, false),
};
MF->createCgInstruction(*CurBB, TII.get(TargetOpcode::COPY),
CopyToRDXOperands);
MulxSourceReg = SourceReg;
}

if (!NeedHigh && DeadMulxHiReg == X86::NoRegister) {
DeadMulxHiReg = createReg(RC);
}

CgRegister LoReg = createReg(RC);
CgRegister HiReg = NeedHigh ? createReg(RC) : DeadMulxHiReg;
SmallVector<CgOperand, 3> MulxOperands{
CgOperand::createRegOperand(HiReg, true, false, false, !NeedHigh),
CgOperand::createRegOperand(LoReg, true),
CgOperand::createRegOperand(OperandReg, false),
};
MF->createCgInstruction(*CurBB, TII.get(X86::MULX64rr), MulxOperands);
return {LoReg, NeedHigh ? HiReg : X86::NoRegister};
}

// ==================== Unary Expressions ====================

CgRegister X86CgLowering::lowerNotExpr(MVT VT, CgRegister Operand) {
Expand Down Expand Up @@ -1083,6 +1153,16 @@ X86CgLowering::lowerEvmUmul128HiExpr(const EvmUmul128HiInstruction &Inst) {

CgRegister
X86CgLowering::lowerEvmU256MulExpr(const EvmU256MulInstruction &Inst) {
// This path only exists in the x86 EVM JIT lowering pipeline. Non-JIT EVM
// execution does not reach this codegen path.
if (Subtarget->hasBMI2() && Subtarget->hasADX()) {
return lowerEvmU256MulExprAdx(Inst);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this feature only enabled when EVMJIT

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I'll add a comment for this. Besides that, do we need any additional constraints?

}
return lowerEvmU256MulExprLegacy(Inst);
}

CgRegister
X86CgLowering::lowerEvmU256MulExprLegacy(const EvmU256MulInstruction &Inst) {
static constexpr size_t NumLimbs = 4;
const TargetRegisterClass *RC = &X86::GR64RegClass;
CgRegister ZeroReg = X86MaterializeInt(0, MVT::i64);
Expand Down Expand Up @@ -1129,26 +1209,15 @@ X86CgLowering::lowerEvmU256MulExpr(const EvmU256MulInstruction &Inst) {
return {LoReg, HiReg};
};

auto addNoCarry = [&](CgRegister LHSReg, CgRegister RHSReg) {
return fastEmitInst_rr(X86::ADD64rr, RC, LHSReg, RHSReg);
};

auto addWithCarryCounter = [&](CgRegister SumReg, CgRegister CarryReg,
CgRegister TermReg) {
CgRegister NewSum = fastEmitInst_rr(X86::ADD64rr, RC, SumReg, TermReg);
CgRegister NewCarry = fastEmitInst_rr(X86::ADC64rr, RC, CarryReg, ZeroReg);
return std::pair<CgRegister, CgRegister>(NewSum, NewCarry);
};

auto [R0, H00] = emitMul64(A[0], B[0], true);
auto [L01, H01] = emitMul64(A[0], B[1], true);
auto [L10, H10] = emitMul64(A[1], B[0], true);

CgRegister R1 = H00;
CgRegister C1 = ZeroReg;
{
auto [S1, C1a] = addWithCarryCounter(R1, C1, L01);
auto [S2, C1b] = addWithCarryCounter(S1, C1a, L10);
auto [S1, C1a] = emitAdd64WithCarryCounter(RC, R1, C1, L01);
auto [S2, C1b] = emitAdd64WithCarryCounter(RC, S1, C1a, L10);
R1 = S2;
C1 = C1b;
}
Expand All @@ -1160,11 +1229,11 @@ X86CgLowering::lowerEvmU256MulExpr(const EvmU256MulInstruction &Inst) {
CgRegister R2 = H01;
CgRegister C2 = ZeroReg;
{
auto [S1, C2a] = addWithCarryCounter(R2, C2, H10);
auto [S2, C2b] = addWithCarryCounter(S1, C2a, L02);
auto [S3, C2c] = addWithCarryCounter(S2, C2b, L11);
auto [S4, C2d] = addWithCarryCounter(S3, C2c, L20);
auto [S5, C2e] = addWithCarryCounter(S4, C2d, C1);
auto [S1, C2a] = emitAdd64WithCarryCounter(RC, R2, C2, H10);
auto [S2, C2b] = emitAdd64WithCarryCounter(RC, S1, C2a, L02);
auto [S3, C2c] = emitAdd64WithCarryCounter(RC, S2, C2b, L11);
auto [S4, C2d] = emitAdd64WithCarryCounter(RC, S3, C2c, L20);
auto [S5, C2e] = emitAdd64WithCarryCounter(RC, S4, C2d, C1);
R2 = S5;
C2 = C2e;
}
Expand All @@ -1179,18 +1248,69 @@ X86CgLowering::lowerEvmU256MulExpr(const EvmU256MulInstruction &Inst) {
(void)Unused30;

CgRegister R3 = H02;
R3 = addNoCarry(R3, H11);
R3 = addNoCarry(R3, H20);
R3 = addNoCarry(R3, L03);
R3 = addNoCarry(R3, L12);
R3 = addNoCarry(R3, L21);
R3 = addNoCarry(R3, L30);
R3 = addNoCarry(R3, C2);
R3 = emitAdd64NoCarry(RC, R3, H11);
R3 = emitAdd64NoCarry(RC, R3, H20);
R3 = emitAdd64NoCarry(RC, R3, L03);
R3 = emitAdd64NoCarry(RC, R3, L12);
R3 = emitAdd64NoCarry(RC, R3, L21);
R3 = emitAdd64NoCarry(RC, R3, L30);
R3 = emitAdd64NoCarry(RC, R3, C2);

U256MulResultRegs[&Inst] = {R1, R2, R3};
return R0;
}

CgRegister
X86CgLowering::lowerEvmU256MulExprAdx(const EvmU256MulInstruction &Inst) {
static constexpr size_t NumLimbs = 4;
const TargetRegisterClass *RC = &X86::GR64RegClass;
CgRegister ZeroReg = X86MaterializeInt(0, MVT::i64);

std::array<CgRegister, NumLimbs> A = {};
std::array<CgRegister, NumLimbs> B = {};
for (size_t I = 0; I < NumLimbs; ++I) {
A[I] = lowerExpr(*Inst.getOperand(I));
B[I] = lowerExpr(*Inst.getOperand(NumLimbs + I));
}

CgRegister MulxSourceReg = X86::NoRegister;
CgRegister DeadMulxHiReg = X86::NoRegister;
auto [R0, H00] =
emitMulx64(RC, MulxSourceReg, DeadMulxHiReg, A[0], B[0], true);
std::array<CgRegister, NumLimbs> Acc{R0, H00, ZeroReg, ZeroReg};

// The final CF/OF left after a row only carry into limb 4, which is outside
// the truncated 256-bit product, so the next row can start with both chains
// cleared.
clearCarryChains(ZeroReg);
for (size_t J = 1; J < NumLimbs; ++J) {
bool NeedHigh = (J + 1) < NumLimbs;
auto [LoReg, HiReg] =
emitMulx64(RC, MulxSourceReg, DeadMulxHiReg, A[0], B[J], NeedHigh);
Acc[J] = emitAdcx64(RC, Acc[J], LoReg);
if (NeedHigh) {
Acc[J + 1] = emitAdox64(RC, Acc[J + 1], HiReg);
}
}

for (size_t I = 1; I < NumLimbs; ++I) {
clearCarryChains(ZeroReg);
for (size_t J = 0; J < NumLimbs - I; ++J) {
size_t Column = I + J;
bool NeedHigh = (Column + 1) < NumLimbs;
auto [LoReg, HiReg] =
emitMulx64(RC, MulxSourceReg, DeadMulxHiReg, A[I], B[J], NeedHigh);
Acc[Column] = emitAdcx64(RC, Acc[Column], LoReg);
if (NeedHigh) {
Acc[Column + 1] = emitAdox64(RC, Acc[Column + 1], HiReg);
}
}
}

U256MulResultRegs[&Inst] = {Acc[1], Acc[2], Acc[3]};
return Acc[0];
}

CgRegister X86CgLowering::lowerEvmU256MulResultExpr(
const EvmU256MulResultInstruction &Inst) {
const MInstruction *MulInst = Inst.getMulInst();
Expand Down
19 changes: 19 additions & 0 deletions src/compiler/target/x86/x86lowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,25 @@ class X86CgLowering : public CgLowering<X86CgLowering> {
static unsigned X86ChooseCmpImmediateOpcode(MVT VT, int64_t Val);
static unsigned X86ChooseCmpImmediateOpcode(MVT VT, const APInt &Value);
static unsigned X86ChooseCmpOpcode(MVT VT);
CgRegister emitAdd64NoCarry(const TargetRegisterClass *RC, CgRegister LHSReg,
CgRegister RHSReg);
std::pair<CgRegister, CgRegister>
emitAdd64WithCarryCounter(const TargetRegisterClass *RC, CgRegister SumReg,
CgRegister CarryReg, CgRegister TermReg);
CgRegister emitAdcx64(const TargetRegisterClass *RC, CgRegister DstReg,
CgRegister SrcReg);
CgRegister emitAdox64(const TargetRegisterClass *RC, CgRegister DstReg,
CgRegister SrcReg);
CgRegister collectCarryChains(const TargetRegisterClass *RC,
CgRegister CarryReg, CgRegister ZeroReg);
void clearCarryChains(CgRegister ZeroReg);
std::pair<CgRegister, CgRegister>
emitMulx64(const TargetRegisterClass *RC, CgRegister &MulxSourceReg,
CgRegister &DeadMulxHiReg, CgRegister SourceReg,
CgRegister OperandReg, bool NeedHigh);

CgRegister lowerEvmU256MulExprLegacy(const EvmU256MulInstruction &Inst);
CgRegister lowerEvmU256MulExprAdx(const EvmU256MulInstruction &Inst);

void lowerFastCompareExpr(const MInstruction *LHS, const MInstruction *RHS,
MVT VT);
Expand Down
Loading