Skip to content

[feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv #109914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 48 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
a46dd92
added evaluateInstruction method. needs tests
arjunUpatel Sep 25, 2024
30dd584
dealing with git
arjunUpatel Sep 25, 2024
fce36f1
sign extend relevant immediates
arjunUpatel Oct 14, 2024
4c8d769
fix indentation
arjunUpatel Oct 15, 2024
81e6acd
fix ADDI + sign extention bugs
arjunUpatel Oct 15, 2024
c31baef
prevent symbol reoslution in empty sections
arjunUpatel Oct 15, 2024
de457f9
added support for compressed instructions
arjunUpatel Oct 20, 2024
d19be54
Merge branch 'llvm:main' into main
arjunUpatel Oct 20, 2024
ecfcf56
call evaluateInstruction only when target=RISCV
arjunUpatel Oct 20, 2024
33e100e
fix bug cause test failures with build
arjunUpatel Oct 21, 2024
3fc4e31
Instr eval based on reg width and attempt to pass tests
arjunUpatel Nov 19, 2024
e425a7c
address comments, merge instruction evaluation and pass register widt…
arjunUpatel Nov 21, 2024
571f056
remove debugging code
arjunUpatel Nov 21, 2024
c36f94e
silly me forgot to save changes
arjunUpatel Nov 21, 2024
c461809
run clang format
arjunUpatel Nov 21, 2024
96dd3c3
fix code suggestion
arjunUpatel Dec 23, 2024
846d055
remove absolute first
arjunUpatel Dec 23, 2024
b781312
objdump prioritize actual symbols over dummy symbols during resolution
arjunUpatel Jan 8, 2025
146043d
remove unist.h from includes
arjunUpatel Jan 8, 2025
e8ea3bb
Revert "objdump prioritize actual symbols over dummy symbols during r…
arjunUpatel Jan 21, 2025
1a23c2c
modify test to effectively test new functionailty
arjunUpatel Jan 23, 2025
707a1ed
Update .gitignore
arjunUpatel Jan 26, 2025
e94080f
Update tests to match new functionality
arjunUpatel Jan 26, 2025
4eb81b8
Add tests of scenarios provided in issue description (see issue relat…
arjunUpatel May 19, 2025
4169bd4
Update tests to accurately match new symbol resolution search pattern
arjunUpatel May 19, 2025
e52cbb9
Added tests to increase code coverage of new functionality
arjunUpatel May 19, 2025
fe84244
Help llvm-lit find new tests
arjunUpatel May 19, 2025
f2b402b
FIx zero register bug. Previously address resolution would be trigger…
arjunUpatel May 19, 2025
5f801eb
Remove ignore of local folder as per comments
arjunUpatel May 19, 2025
7d66e20
Remove extraneous header as per comments
arjunUpatel May 19, 2025
b42cdbb
Use unsigned instead of signed int for values that are always positiv…
arjunUpatel May 19, 2025
9476135
Add support for Zcb extensions + corresponding tests
arjunUpatel May 20, 2025
e3e96c5
Added support for stack pointer based load and stores
arjunUpatel May 21, 2025
e2888d7
Merge branch 'main' into main
arjunUpatel May 21, 2025
d1be8f7
Use unsigned int for ArchRegWidth
arjunUpatel May 21, 2025
9699b57
Update tests to match new functionality
arjunUpatel May 21, 2025
e86e92e
One more try at passing tests
arjunUpatel May 22, 2025
a284a64
Non-exact offset match for failing test
arjunUpatel May 23, 2025
8696193
Improve documentation for evaluateInstruction
arjunUpatel Jun 5, 2025
77e8c52
Merge branch 'main' into main
arjunUpatel Jun 5, 2025
090c062
Fix typo in llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s do…
arjunUpatel Jun 9, 2025
0366e87
Avoid else case as per comments
arjunUpatel Jun 9, 2025
a01fa24
Likely did a bad merge in the past. Updating to reflect correct code …
arjunUpatel Jun 9, 2025
afd4861
Merge branch 'main' into main
arjunUpatel Jun 9, 2025
3e57664
Merge branch 'main' of github.com:arjunUpatel/llvm-project
arjunUpatel Jun 9, 2025
47d964e
Reduce map lookup of section symbols
arjunUpatel Jun 10, 2025
52ebb65
Remove changes affecting non-RISCV targets
arjunUpatel Jun 10, 2025
1737696
Merge branch 'main' of github.com:arjunUpatel/llvm-project
arjunUpatel Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/MC/MCInstrAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ class LLVM_ABI MCInstrAnalysis {
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;

/// Given an instruction that accesses a memory address, try to compute
/// the target address. Return true on success, and the address in \p Target.
virtual bool evaluateInstruction(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target) const;

/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
virtual std::optional<uint64_t>
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/MC/MCInstrAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}

bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst, uint64_t Addr,
uint64_t Size,
uint64_t &Target) const {
return false;
}

std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
uint64_t Size) const {
Expand Down
113 changes: 103 additions & 10 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <bitset>
#include <cstdint>

#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
Expand Down Expand Up @@ -129,6 +131,7 @@ namespace {
class RISCVMCInstrAnalysis : public MCInstrAnalysis {
int64_t GPRState[31] = {};
std::bitset<31> GPRValidMask;
unsigned int ArchRegWidth;

static bool isGPR(MCRegister Reg) {
return Reg >= RISCV::X0 && Reg <= RISCV::X31;
Expand Down Expand Up @@ -165,8 +168,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}

public:
explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
: MCInstrAnalysis(Info) {}
explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info, unsigned int ArchRegWidth)
: MCInstrAnalysis(Info), ArchRegWidth(ArchRegWidth) {}

void resetState() override { GPRValidMask.reset(); }

Expand All @@ -182,6 +185,17 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}

switch (Inst.getOpcode()) {
case RISCV::C_LUI:
case RISCV::LUI: {
setGPRState(Inst.getOperand(0).getReg(),
SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
break;
}
case RISCV::AUIPC: {
setGPRState(Inst.getOperand(0).getReg(),
Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
break;
}
default: {
// Clear the state of all defined registers for instructions that we don't
// explicitly support.
Expand All @@ -193,10 +207,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
break;
}
case RISCV::AUIPC:
setGPRState(Inst.getOperand(0).getReg(),
Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
break;
}
}

Expand Down Expand Up @@ -234,6 +244,83 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return false;
}

bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
switch(Inst.getOpcode()) {
default:
return false;
case RISCV::C_ADDI:
case RISCV::ADDI: {
MCRegister Reg = Inst.getOperand(1).getReg();
auto TargetRegState = getGPRState(Reg);
if (TargetRegState && Reg != RISCV::X0) {
Target = *TargetRegState + Inst.getOperand(2).getImm();
Target &= maskTrailingOnes<uint64_t>(ArchRegWidth);
return true;
}
break;
}
case RISCV::C_ADDIW:
case RISCV::ADDIW: {
MCRegister Reg = Inst.getOperand(1).getReg();
auto TargetRegState = getGPRState(Reg);
if (TargetRegState && Reg != RISCV::X0) {
Target = *TargetRegState + Inst.getOperand(2).getImm();
Target = SignExtend64<32>(Target);
return true;
}
break;
}
case RISCV::LB:
case RISCV::LH:
case RISCV::LD:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FLH:
case RISCV::FLW:
case RISCV::FLD:
case RISCV::FSH:
case RISCV::FSW:
case RISCV::FSD:
case RISCV::C_LD:
case RISCV::C_SD:
case RISCV::C_FLD:
case RISCV::C_FSD:
case RISCV::C_SW:
case RISCV::C_LW:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing compressed loads/stores from Zcb extension. C_LBU, C_LH, C_LHU, C_SB, C_SH.

Missing C_LWSP, C_SWSP, C_LDSP, and C_SDSP from C extension. These implicitly use SP as the base register.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're missing the Zilsd instructions from RISCVInstrInfoZilsd.td and RISCVInstrInfoZclsd.td. We can add them as a follow up, but mentioning so they don't get forgotten

case RISCV::C_FSW:
case RISCV::C_FLW:
case RISCV::C_LBU:
case RISCV::C_LH:
case RISCV::C_LHU:
case RISCV::C_SB:
case RISCV::C_SH:
case RISCV::C_LWSP:
case RISCV::C_SWSP:
case RISCV::C_LDSP:
case RISCV::C_SDSP:
case RISCV::C_FLWSP:
case RISCV::C_FSWSP:
case RISCV::C_FLDSP:
case RISCV::C_FSDSP: {
MCRegister Reg = Inst.getOperand(1).getReg();
auto TargetRegState = getGPRState(Reg);
if (TargetRegState && Reg != RISCV::X0) {
Target = *TargetRegState + Inst.getOperand(2).getImm();
return true;
}
break;
}
}
return false;
}

bool isTerminator(const MCInst &Inst) const override {
if (MCInstrAnalysis::isTerminator(Inst))
return true;
Expand Down Expand Up @@ -327,8 +414,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {

} // end anonymous namespace

static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
return new RISCVMCInstrAnalysis(Info);
static MCInstrAnalysis *createRISCV32InstrAnalysis(const MCInstrInfo *Info) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we really not thread in the subtarget info from somewhere (either for the constructor or for the specific method calls) rather than do this? Every other class in this file that gets instantiated doesn't have a triple-specific factory method.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the best way to do this is to make the subtarget info a member of the parent class MCInstrAnalysis. The subtarget info is also used by other member functions, notably findPltEntries. All the uses of subtarget info seem to be static across all the member functions that need this information, that is, it does not change in subsequent calls to these functions in a given invocation. As such, it makes sense to tie it to the parent class so it is set once and referenced when needed versus continuously being passed as an argument even if it never changes. For the sake of keeping this PR relevant perhaps we can pass the subtarget info as an argument to evaluateInstruction for now so that it matches the implementation of findPltEntries and how the subtarget info is threaded in can be updated in a follow up PR?

return new RISCVMCInstrAnalysis(Info, 32);
}

static MCInstrAnalysis *createRISCV64InstrAnalysis(const MCInstrInfo *Info) {
return new RISCVMCInstrAnalysis(Info, 64);
}

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
Expand All @@ -344,12 +435,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer);
TargetRegistry::RegisterObjectTargetStreamer(
*T, createRISCVObjectTargetStreamer);
TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);

// Register the asm target streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer);
// Register the null target streamer.
TargetRegistry::RegisterNullTargetStreamer(*T,
createRISCVNullTargetStreamer);
}
TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(),
createRISCV32InstrAnalysis);
TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(),
createRISCV64InstrAnalysis);
}
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "RISCV" in config.root.targets:
config.unsupported = True
111 changes: 111 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# RUN: llvm-objdump -d %p/Inputs/riscv-ar-coverage | FileCheck %s

# CHECK: 0000000000001000 <_start>:
# CHECK-NEXT: 1000: 00001517 auipc a0, 0x1
# CHECK-NEXT: 1004: 00450513 addi a0, a0, 0x4 <target>
# CHECK-NEXT: 1008: 00001517 auipc a0, 0x1
# CHECK-NEXT: 100c: 1571 addi a0, a0, -0x4 <target>
# CHECK-NEXT: 100e: 6509 lui a0, 0x2
# CHECK-NEXT: 1010: 0045059b addiw a1, a0, 0x4 <target>
# CHECK-NEXT: 1014: 6509 lui a0, 0x2
# CHECK-NEXT: 1016: 2511 addiw a0, a0, 0x4 <target>
# CHECK-NEXT: 1018: 00102537 lui a0, 0x102
# CHECK-NEXT: 101c: c50c sw a1, 0x8(a0) <far_target>
# CHECK-NEXT: 101e: 00102537 lui a0, 0x102
# CHECK-NEXT: 1022: 4508 lw a0, 0x8(a0) <far_target>
# CHECK-NEXT: 1024: 6509 lui a0, 0x2
# CHECK-NEXT: 1026: 6585 lui a1, 0x1
# CHECK-NEXT: 1028: 0306 slli t1, t1, 0x1
# CHECK-NEXT: 102a: 0511 addi a0, a0, 0x4 <target>
# CHECK-NEXT: 102c: 0505 addi a0, a0, 0x1
# CHECK-NEXT: 102e: 00200037 lui zero, 0x200
# CHECK-NEXT: 1032: 00a02423 sw a0, 0x8(zero)
# CHECK-NEXT: 1036: 00101097 auipc ra, 0x101
# CHECK-NEXT: 103a: fd6080e7 jalr -0x2a(ra) <func>
# CHECK-NEXT: 103e: 00102437 lui s0, 0x102
# CHECK-NEXT: 1042: 8800 sb s0, 0x0(s0) <target+0xffffc>
# CHECK-NEXT: 1044: 00102137 lui sp, 0x102
# CHECK-NEXT: 1048: 4522 lw a0, 0x8(sp) <far_target>

.global _start
.text

# The core of the feature being added was address resolution for instruction
# sequences where a register is populated by immediate values via two
# separate instructions. First by an instruction that provides the upper bits
# (auipc, lui ...) followed by another instruction for the lower bits (addi,
# jalr, ld ...).


_start:
# Test block 1-3 each focus on a certain starting instruction in a sequences,
# the ones that provide the upper bits. The other sequence is another
# instruction the provides the lower bits. The second instruction is
# arbitrarily chosen to increase code coverage

# test block #1
lla a0, target # addi
auipc a0, 0x1
c.addi a0, -0x4 # c.addi

# test block #2
c.lui a0, 0x2
addiw a1, a0, 0x4 # addiw
c.lui a0, 0x2
c.addiw a0, 0x4 # c.addiw

# test block #3
lui a0, 0x102
sw a1, 0x8(a0) # sw
lui a0, 0x102
c.lw a0, 0x8(a0) # lw

# Test block 4 tests instruction interleaving, essentially the code's
# ability to keep track of a valid sequence even if multiple other unrelated
# instructions separate the two

# test #4
lui a0, 0x2
lui a1, 0x1 # unrelated instruction
slli t1, t1, 0x1 # unrelated instruction
addi a0, a0, 0x4
addi a0, a0, 0x1

# Test 5 ensures that an instruction writing into the zero register does
# not trigger resolution because that register's value cannot change and
# the sequence is equivalent to never running the first instruction

# test #5
lui x0, 0x200
sw a0, 0x8(x0)

# Test 6 ensures that the newly added functionality is compatible with
# code that already worked for branch instructions

# test #6
call func

# test #7 zcb extension
lui x8, 0x102
# the immediate value for Zcb extension is heavily bounded, so we will relax
# the requirement of hitting one of the labels and focus on correctness of the
# resolution. This can be verified by looking at the source: The upper bits of
# lui make the far jump related to .skip 0x100000 and then 8 more bytes must be
# traversed before we hit far_target--.skip 0x4 and .word 1 in target. Adding 8
# to address resolved for the instruction below yields exactly the desired label.
c.sb x8, 0(x8)

# test #8 stack based load/stores
lui sp, 0x102
c.lwsp a0, 0x8(sp)

# these are the labels that the instructions above are expecteed to resolve to
.section .data
.skip 0x4
target:
.word 1
.skip 0x100000
far_target:
.word 2
func:
ret
57 changes: 57 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# RUN: llvm-objdump -d %p/Inputs/riscv-ar | FileCheck %s

# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: ld a0, {{-?0x[0-9a-fA-F]+}}(a0) <ldata+0xfa4>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: lw a0, {{-?0x[0-9a-fA-F]+}}(a0) <gdata>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <ldata>
# CHECK: auipc ra, {{-?0x[0-9a-fA-F]+}}
# CHECK: jalr {{-?0x[0-9a-fA-F]+}}(ra) <func>
# CHECK: auipc t1, {{-?0x[0-9a-fA-F]+}}
# CHECK: jr {{-?0x[0-9a-fA-F]+}}(t1) <func>
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x12242678>
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x1438ad>
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <_start+0xfefff>

.global _start
.text
_start:
la a0, gdata
lla a0, gdata
lla a0, gdata
lw a0, gdata
lla a0, ldata

call func
tail func

li a0, 0x12345678
li a0, 0x1234567890abcdef
li a0, 0x10000
li a0, 0xfffff

.skip 0x100000
func:
ret

ldata:
.int 0

.data
gdata:
.int 0
Loading