-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv #109914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a46dd92
30dd584
fce36f1
4c8d769
81e6acd
c31baef
de457f9
d19be54
ecfcf56
33e100e
3fc4e31
e425a7c
571f056
c36f94e
c461809
96dd3c3
846d055
b781312
146043d
e8ea3bb
1a23c2c
707a1ed
e94080f
4eb81b8
4169bd4
e52cbb9
fe84244
f2b402b
5f801eb
7d66e20
b42cdbb
9476135
e3e96c5
e2888d7
d1be8f7
9699b57
e86e92e
a284a64
8696193
77e8c52
090c062
0366e87
a01fa24
afd4861
3e57664
47d964e
52ebb65
1737696
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,7 +29,9 @@ | |
#include "llvm/MC/MCSubtargetInfo.h" | ||
#include "llvm/MC/TargetRegistry.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
#include "llvm/Support/MathExtras.h" | ||
#include <bitset> | ||
#include <cstdint> | ||
|
||
#define GET_INSTRINFO_MC_DESC | ||
#define ENABLE_INSTR_PREDICATE_VERIFIER | ||
|
@@ -129,6 +131,7 @@ namespace { | |
class RISCVMCInstrAnalysis : public MCInstrAnalysis { | ||
int64_t GPRState[31] = {}; | ||
std::bitset<31> GPRValidMask; | ||
unsigned int ArchRegWidth; | ||
|
||
static bool isGPR(MCRegister Reg) { | ||
return Reg >= RISCV::X0 && Reg <= RISCV::X31; | ||
|
@@ -165,8 +168,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { | |
} | ||
|
||
public: | ||
explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info) | ||
: MCInstrAnalysis(Info) {} | ||
explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info, unsigned int ArchRegWidth) | ||
: MCInstrAnalysis(Info), ArchRegWidth(ArchRegWidth) {} | ||
|
||
void resetState() override { GPRValidMask.reset(); } | ||
|
||
|
@@ -182,6 +185,17 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { | |
} | ||
|
||
switch (Inst.getOpcode()) { | ||
case RISCV::C_LUI: | ||
case RISCV::LUI: { | ||
setGPRState(Inst.getOperand(0).getReg(), | ||
SignExtend64<32>(Inst.getOperand(1).getImm() << 12)); | ||
break; | ||
} | ||
case RISCV::AUIPC: { | ||
setGPRState(Inst.getOperand(0).getReg(), | ||
Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12)); | ||
break; | ||
} | ||
default: { | ||
// Clear the state of all defined registers for instructions that we don't | ||
// explicitly support. | ||
|
@@ -193,10 +207,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { | |
} | ||
break; | ||
} | ||
case RISCV::AUIPC: | ||
setGPRState(Inst.getOperand(0).getReg(), | ||
Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12)); | ||
break; | ||
} | ||
} | ||
|
||
|
@@ -234,6 +244,83 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { | |
return false; | ||
} | ||
|
||
bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size, | ||
uint64_t &Target) const override { | ||
switch(Inst.getOpcode()) { | ||
default: | ||
return false; | ||
case RISCV::C_ADDI: | ||
case RISCV::ADDI: { | ||
MCRegister Reg = Inst.getOperand(1).getReg(); | ||
auto TargetRegState = getGPRState(Reg); | ||
if (TargetRegState && Reg != RISCV::X0) { | ||
Target = *TargetRegState + Inst.getOperand(2).getImm(); | ||
Target &= maskTrailingOnes<uint64_t>(ArchRegWidth); | ||
return true; | ||
} | ||
break; | ||
} | ||
case RISCV::C_ADDIW: | ||
case RISCV::ADDIW: { | ||
MCRegister Reg = Inst.getOperand(1).getReg(); | ||
auto TargetRegState = getGPRState(Reg); | ||
if (TargetRegState && Reg != RISCV::X0) { | ||
Target = *TargetRegState + Inst.getOperand(2).getImm(); | ||
Target = SignExtend64<32>(Target); | ||
return true; | ||
} | ||
break; | ||
} | ||
case RISCV::LB: | ||
case RISCV::LH: | ||
case RISCV::LD: | ||
case RISCV::LW: | ||
case RISCV::LBU: | ||
case RISCV::LHU: | ||
case RISCV::LWU: | ||
case RISCV::SB: | ||
case RISCV::SH: | ||
case RISCV::SW: | ||
case RISCV::SD: | ||
case RISCV::FLH: | ||
case RISCV::FLW: | ||
case RISCV::FLD: | ||
case RISCV::FSH: | ||
case RISCV::FSW: | ||
case RISCV::FSD: | ||
case RISCV::C_LD: | ||
case RISCV::C_SD: | ||
case RISCV::C_FLD: | ||
case RISCV::C_FSD: | ||
case RISCV::C_SW: | ||
case RISCV::C_LW: | ||
case RISCV::C_FSW: | ||
case RISCV::C_FLW: | ||
case RISCV::C_LBU: | ||
case RISCV::C_LH: | ||
case RISCV::C_LHU: | ||
case RISCV::C_SB: | ||
case RISCV::C_SH: | ||
case RISCV::C_LWSP: | ||
case RISCV::C_SWSP: | ||
case RISCV::C_LDSP: | ||
case RISCV::C_SDSP: | ||
case RISCV::C_FLWSP: | ||
case RISCV::C_FSWSP: | ||
case RISCV::C_FLDSP: | ||
case RISCV::C_FSDSP: { | ||
MCRegister Reg = Inst.getOperand(1).getReg(); | ||
auto TargetRegState = getGPRState(Reg); | ||
if (TargetRegState && Reg != RISCV::X0) { | ||
Target = *TargetRegState + Inst.getOperand(2).getImm(); | ||
return true; | ||
} | ||
break; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
bool isTerminator(const MCInst &Inst) const override { | ||
if (MCInstrAnalysis::isTerminator(Inst)) | ||
return true; | ||
|
@@ -327,8 +414,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis { | |
|
||
} // end anonymous namespace | ||
|
||
static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) { | ||
return new RISCVMCInstrAnalysis(Info); | ||
static MCInstrAnalysis *createRISCV32InstrAnalysis(const MCInstrInfo *Info) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we really not thread in the subtarget info from somewhere (either for the constructor or for the specific method calls) rather than do this? Every other class in this file that gets instantiated doesn't have a triple-specific factory method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the best way to do this is to make the subtarget info a member of the parent class |
||
return new RISCVMCInstrAnalysis(Info, 32); | ||
} | ||
|
||
static MCInstrAnalysis *createRISCV64InstrAnalysis(const MCInstrInfo *Info) { | ||
return new RISCVMCInstrAnalysis(Info, 64); | ||
} | ||
|
||
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() { | ||
|
@@ -344,12 +435,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() { | |
TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer); | ||
TargetRegistry::RegisterObjectTargetStreamer( | ||
*T, createRISCVObjectTargetStreamer); | ||
TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis); | ||
|
||
// Register the asm target streamer. | ||
TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer); | ||
// Register the null target streamer. | ||
TargetRegistry::RegisterNullTargetStreamer(*T, | ||
createRISCVNullTargetStreamer); | ||
} | ||
TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(), | ||
createRISCV32InstrAnalysis); | ||
TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(), | ||
createRISCV64InstrAnalysis); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
if not "RISCV" in config.root.targets: | ||
config.unsupported = True |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# RUN: llvm-objdump -d %p/Inputs/riscv-ar-coverage | FileCheck %s | ||
|
||
# CHECK: 0000000000001000 <_start>: | ||
# CHECK-NEXT: 1000: 00001517 auipc a0, 0x1 | ||
# CHECK-NEXT: 1004: 00450513 addi a0, a0, 0x4 <target> | ||
# CHECK-NEXT: 1008: 00001517 auipc a0, 0x1 | ||
# CHECK-NEXT: 100c: 1571 addi a0, a0, -0x4 <target> | ||
# CHECK-NEXT: 100e: 6509 lui a0, 0x2 | ||
# CHECK-NEXT: 1010: 0045059b addiw a1, a0, 0x4 <target> | ||
# CHECK-NEXT: 1014: 6509 lui a0, 0x2 | ||
# CHECK-NEXT: 1016: 2511 addiw a0, a0, 0x4 <target> | ||
# CHECK-NEXT: 1018: 00102537 lui a0, 0x102 | ||
# CHECK-NEXT: 101c: c50c sw a1, 0x8(a0) <far_target> | ||
# CHECK-NEXT: 101e: 00102537 lui a0, 0x102 | ||
# CHECK-NEXT: 1022: 4508 lw a0, 0x8(a0) <far_target> | ||
# CHECK-NEXT: 1024: 6509 lui a0, 0x2 | ||
# CHECK-NEXT: 1026: 6585 lui a1, 0x1 | ||
# CHECK-NEXT: 1028: 0306 slli t1, t1, 0x1 | ||
# CHECK-NEXT: 102a: 0511 addi a0, a0, 0x4 <target> | ||
# CHECK-NEXT: 102c: 0505 addi a0, a0, 0x1 | ||
# CHECK-NEXT: 102e: 00200037 lui zero, 0x200 | ||
# CHECK-NEXT: 1032: 00a02423 sw a0, 0x8(zero) | ||
# CHECK-NEXT: 1036: 00101097 auipc ra, 0x101 | ||
# CHECK-NEXT: 103a: fd6080e7 jalr -0x2a(ra) <func> | ||
# CHECK-NEXT: 103e: 00102437 lui s0, 0x102 | ||
# CHECK-NEXT: 1042: 8800 sb s0, 0x0(s0) <target+0xffffc> | ||
# CHECK-NEXT: 1044: 00102137 lui sp, 0x102 | ||
# CHECK-NEXT: 1048: 4522 lw a0, 0x8(sp) <far_target> | ||
|
||
.global _start | ||
.text | ||
|
||
# The core of the feature being added was address resolution for instruction | ||
# sequences where a register is populated by immediate values via two | ||
# separate instructions. First by an instruction that provides the upper bits | ||
# (auipc, lui ...) followed by another instruction for the lower bits (addi, | ||
# jalr, ld ...). | ||
|
||
|
||
_start: | ||
# Test block 1-3 each focus on a certain starting instruction in a sequences, | ||
# the ones that provide the upper bits. The other sequence is another | ||
# instruction the provides the lower bits. The second instruction is | ||
# arbitrarily chosen to increase code coverage | ||
|
||
# test block #1 | ||
lla a0, target # addi | ||
auipc a0, 0x1 | ||
c.addi a0, -0x4 # c.addi | ||
|
||
# test block #2 | ||
c.lui a0, 0x2 | ||
addiw a1, a0, 0x4 # addiw | ||
c.lui a0, 0x2 | ||
c.addiw a0, 0x4 # c.addiw | ||
|
||
# test block #3 | ||
lui a0, 0x102 | ||
sw a1, 0x8(a0) # sw | ||
lui a0, 0x102 | ||
c.lw a0, 0x8(a0) # lw | ||
|
||
# Test block 4 tests instruction interleaving, essentially the code's | ||
# ability to keep track of a valid sequence even if multiple other unrelated | ||
# instructions separate the two | ||
|
||
# test #4 | ||
lui a0, 0x2 | ||
lui a1, 0x1 # unrelated instruction | ||
slli t1, t1, 0x1 # unrelated instruction | ||
addi a0, a0, 0x4 | ||
addi a0, a0, 0x1 | ||
|
||
# Test 5 ensures that an instruction writing into the zero register does | ||
# not trigger resolution because that register's value cannot change and | ||
# the sequence is equivalent to never running the first instruction | ||
|
||
# test #5 | ||
lui x0, 0x200 | ||
sw a0, 0x8(x0) | ||
|
||
# Test 6 ensures that the newly added functionality is compatible with | ||
# code that already worked for branch instructions | ||
|
||
# test #6 | ||
call func | ||
|
||
# test #7 zcb extension | ||
lui x8, 0x102 | ||
# the immediate value for Zcb extension is heavily bounded, so we will relax | ||
# the requirement of hitting one of the labels and focus on correctness of the | ||
# resolution. This can be verified by looking at the source: The upper bits of | ||
# lui make the far jump related to .skip 0x100000 and then 8 more bytes must be | ||
# traversed before we hit far_target--.skip 0x4 and .word 1 in target. Adding 8 | ||
# to address resolved for the instruction below yields exactly the desired label. | ||
c.sb x8, 0(x8) | ||
|
||
# test #8 stack based load/stores | ||
lui sp, 0x102 | ||
c.lwsp a0, 0x8(sp) | ||
|
||
# these are the labels that the instructions above are expecteed to resolve to | ||
.section .data | ||
.skip 0x4 | ||
target: | ||
.word 1 | ||
.skip 0x100000 | ||
far_target: | ||
.word 2 | ||
func: | ||
ret |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# RUN: llvm-objdump -d %p/Inputs/riscv-ar | FileCheck %s | ||
|
||
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: ld a0, {{-?0x[0-9a-fA-F]+}}(a0) <ldata+0xfa4> | ||
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata> | ||
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata> | ||
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: lw a0, {{-?0x[0-9a-fA-F]+}}(a0) <gdata> | ||
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <ldata> | ||
# CHECK: auipc ra, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: jalr {{-?0x[0-9a-fA-F]+}}(ra) <func> | ||
# CHECK: auipc t1, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: jr {{-?0x[0-9a-fA-F]+}}(t1) <func> | ||
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x12242678> | ||
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x1438ad> | ||
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}} | ||
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <_start+0xfefff> | ||
|
||
.global _start | ||
.text | ||
_start: | ||
la a0, gdata | ||
lla a0, gdata | ||
lla a0, gdata | ||
lw a0, gdata | ||
lla a0, ldata | ||
|
||
call func | ||
tail func | ||
|
||
li a0, 0x12345678 | ||
li a0, 0x1234567890abcdef | ||
li a0, 0x10000 | ||
li a0, 0xfffff | ||
|
||
.skip 0x100000 | ||
func: | ||
ret | ||
|
||
ldata: | ||
.int 0 | ||
|
||
.data | ||
gdata: | ||
.int 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing compressed loads/stores from Zcb extension. C_LBU, C_LH, C_LHU, C_SB, C_SH.
Missing C_LWSP, C_SWSP, C_LDSP, and C_SDSP from C extension. These implicitly use SP as the base register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We're missing the Zilsd instructions from RISCVInstrInfoZilsd.td and RISCVInstrInfoZclsd.td. We can add them as a follow up, but mentioning so they don't get forgotten