Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions compiler/hash-vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ hash-reporting = { workspace = true }
hash-source = { workspace = true }
hash-utils = { workspace = true }
hash-abi = { workspace = true }
hash-storage = { workspace = true }
41 changes: 24 additions & 17 deletions compiler/hash-vm/src/builder/func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use hash_abi::FnAbiId;
use hash_utils::index_vec::IndexVec;

use crate::bytecode::{Instruction, op::LabelOffset};
use crate::bytecode::{Instruction, op::LabelOffset, pretty::FunctionBody};

// Import FunctionBuilder if it's defined in another module
#[derive(Debug)]
Expand Down Expand Up @@ -33,31 +33,38 @@ pub struct FunctionBuilder {
/// \ Instruction 5
/// ...
pub labels: IndexVec<LabelOffset, LabelOffset>,

/// The current label counter, this is used to generate new labels.
label_counter: LabelOffset,
}

impl FunctionBuilder {
/// Create a new [FunctionBuilder] with the given ABI.
pub fn new(abi: FnAbiId) -> Self {
Self {
abi,
body: IndexVec::new(),
labels: IndexVec::new(),
label_counter: LabelOffset::new(0),
}
}

/// Generate a new label within the function.
pub fn new_label(&mut self) -> LabelOffset {
let label = self.label_counter;
self.label_counter = LabelOffset::new(label.get() + 1);
label
Self { abi, body: IndexVec::new(), labels: IndexVec::new() }
}

/// Add an instruction to the function body.
pub fn emit(&mut self, instruction: Instruction) {
self.body.push(instruction);
}

/// Append multiple instructions to the function body.
pub fn append(&mut self, instructions: Vec<Instruction>) {
self.body.extend(instructions);
}

/// Append a new block with its own label to the function body.
pub fn append_block(&mut self, instructions: Vec<Instruction>) {
let label = LabelOffset::new(self.body.len());
self.body.extend(instructions);
self.labels.push(label);
}
}

impl FunctionBody for FunctionBuilder {
fn labels(&self) -> &IndexVec<LabelOffset, LabelOffset> {
&self.labels
}

fn instructions(&self) -> &IndexVec<LabelOffset, Instruction> {
&self.body
}
}
94 changes: 94 additions & 0 deletions compiler/hash-vm/src/bytecode/instruction.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::fmt;

use super::{op::Operand, register::Register};

/// The VM instruction set.
Expand Down Expand Up @@ -417,3 +419,95 @@ pub enum Instruction {
l2: Register,
},
}

impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Instruction::Pop8 { l1 } => write!(f, "pop8 {}", l1),
Instruction::Pop16 { l1 } => write!(f, "pop16 {}", l1),
Instruction::Pop32 { l1 } => write!(f, "pop32 {}", l1),
Instruction::Pop64 { l1 } => write!(f, "pop64 {}", l1),
Instruction::Push8 { l1 } => write!(f, "push8 {}", l1),
Instruction::Push16 { l1 } => write!(f, "push16 {}", l1),
Instruction::Push32 { l1 } => write!(f, "push32 {}", l1),
Instruction::Push64 { l1 } => write!(f, "push64 {}", l1),
Instruction::Add8 { l1, l2 } => write!(f, "add8 {}, {}", l1, l2),
Instruction::Add16 { l1, l2 } => write!(f, "add16 {}, {}", l1, l2),
Instruction::Add32 { l1, l2 } => write!(f, "add32 {}, {}", l1, l2),
Instruction::Add64 { l1, l2 } => write!(f, "add64 {}, {}", l1, l2),
Instruction::Sub8 { l1, l2 } => write!(f, "sub8 {}, {}", l1, l2),
Instruction::Sub16 { l1, l2 } => write!(f, "sub16 {}, {}", l1, l2),
Instruction::Sub32 { l1, l2 } => write!(f, "sub32 {}, {}", l1, l2),
Instruction::Sub64 { l1, l2 } => write!(f, "sub64 {}, {}", l1, l2),
Instruction::Div8 { l1, l2 } => write!(f, "div8 {}, {}", l1, l2),
Instruction::Div16 { l1, l2 } => write!(f, "div16 {}, {}", l1, l2),
Instruction::Div32 { l1, l2 } => write!(f, "div32 {}, {}", l1, l2),
Instruction::Div64 { l1, l2 } => write!(f, "div64 {}, {}", l1, l2),
Instruction::Mul8 { l1, l2 } => write!(f, "mul8 {}, {}", l1, l2),
Instruction::Mul16 { l1, l2 } => write!(f, "mul16 {}, {}", l1, l2),
Instruction::Mul32 { l1, l2 } => write!(f, "mul32 {}, {}", l1, l2),
Instruction::Mul64 { l1, l2 } => write!(f, "mul64 {}, {}", l1, l2),
Instruction::Mod8 { l1, l2 } => write!(f, "mod8 {}, {}", l1, l2),
Instruction::Mod16 { l1, l2 } => write!(f, "mod16 {}, {}", l1, l2),
Instruction::Mod32 { l1, l2 } => write!(f, "mod32 {}, {}", l1, l2),
Instruction::Mod64 { l1, l2 } => write!(f, "mod64 {}, {}", l1, l2),
Instruction::IDiv8 { l1, l2 } => write!(f, "idiv8 {}, {}", l1, l2),
Instruction::IDiv16 { l1, l2 } => write!(f, "idiv16 {}, {}", l1, l2),
Instruction::IDiv32 { l1, l2 } => write!(f, "idiv32 {}, {}", l1, l2),
Instruction::IDiv64 { l1, l2 } => write!(f, "idiv64 {}, {}", l1, l2),
Instruction::IMul8 { l1, l2 } => write!(f, "imul8 {}, {}", l1, l2),
Instruction::IMul16 { l1, l2 } => write!(f, "imul16 {}, {}", l1, l2),
Instruction::IMul32 { l1, l2 } => write!(f, "imul32 {}, {}", l1, l2),
Instruction::IMul64 { l1, l2 } => write!(f, "imul64 {}, {}", l1, l2),
Instruction::AddF32 { l1, l2 } => write!(f, "addf32 {}, {}", l1, l2),
Instruction::AddF64 { l1, l2 } => write!(f, "addf64 {}, {}", l1, l2),
Instruction::SubF32 { l1, l2 } => write!(f, "subf32 {}, {}", l1, l2),
Instruction::SubF64 { l1, l2 } => write!(f, "subf64 {}, {}", l1, l2),
Instruction::DivF32 { l1, l2 } => write!(f, "divf32 {}, {}", l1, l2),
Instruction::DivF64 { l1, l2 } => write!(f, "divf64 {}, {}", l1, l2),
Instruction::MulF32 { l1, l2 } => write!(f, "mulf32 {}, {}", l1, l2),
Instruction::MulF64 { l1, l2 } => write!(f, "mulf64 {}, {}", l1, l2),
Instruction::ModF32 { l1, l2 } => write!(f, "modf32 {}, {}", l1, l2),
Instruction::ModF64 { l1, l2 } => write!(f, "modf64 {}, {}", l1, l2),
Instruction::Xor8 { l1, l2 } => write!(f, "xor8 {}, {}", l1, l2),
Instruction::Xor16 { l1, l2 } => write!(f, "xor16 {}, {}", l1, l2),
Instruction::Xor32 { l1, l2 } => write!(f, "xor32 {}, {}", l1, l2),
Instruction::Xor64 { l1, l2 } => write!(f, "xor64 {}, {}", l1, l2),
Instruction::Or8 { l1, l2 } => write!(f, "or8 {}, {}", l1, l2),
Instruction::Or16 { l1, l2 } => write!(f, "or16 {}, {}", l1, l2),
Instruction::Or32 { l1, l2 } => write!(f, "or32 {}, {}", l1, l2),
Instruction::Or64 { l1, l2 } => write!(f, "or64 {}, {}", l1, l2),
Instruction::And8 { l1, l2 } => write!(f, "and8 {}, {}", l1, l2),
Instruction::And16 { l1, l2 } => write!(f, "and16 {}, {}", l1, l2),
Instruction::And32 { l1, l2 } => write!(f, "and32 {}, {}", l1, l2),
Instruction::And64 { l1, l2 } => write!(f, "and64 {}, {}", l1, l2),
Instruction::Not8 { l1 } => write!(f, "not8 {}", l1),
Instruction::Not16 { l1 } => write!(f, "not16 {}", l1),
Instruction::Not32 { l1 } => write!(f, "not32 {}", l1),
Instruction::Not64 { l1 } => write!(f, "not64 {}", l1),
Instruction::PowF32 { l1, l2 } => write!(f, "powf32 {}, {}", l1, l2),
Instruction::PowF64 { l1, l2 } => write!(f, "powf64 {}, {}", l1, l2),
Instruction::Shl8 { l1, l2 } => write!(f, "shl8 {}, {}", l1, l2),
Instruction::Shl16 { l1, l2 } => write!(f, "shl16 {}, {}", l1, l2),
Instruction::Shl32 { l1, l2 } => write!(f, "shl32 {}, {}", l1, l2),
Instruction::Shl64 { l1, l2 } => write!(f, "shl64 {}, {}", l1, l2),
Instruction::Shr8 { l1, l2 } => write!(f, "shr8 {}, {}", l1, l2),
Instruction::Shr16 { l1, l2 } => write!(f, "shr16 {}, {}", l1, l2),
Instruction::Shr32 { l1, l2 } => write!(f, "shr32 {}, {}", l1, l2),
Instruction::Shr64 { l1, l2 } => write!(f, "shr64 {}, {}", l1, l2),
Instruction::Write8 { reg, value } => write!(f, "write8 {}, {}", reg, value),
Instruction::Write16 { reg, value } => write!(f, "write16 {}, {}", reg, value),
Instruction::Write32 { reg, value } => write!(f, "write32 {}, {}", reg, value),
Instruction::Write64 { reg, value } => write!(f, "write64 {}, {}", reg, value),
Instruction::Call { func } => write!(f, "call {}", func),
Instruction::Mov { src, dest } => write!(f, "mov {}, {}", src, dest),
Instruction::Syscall { id } => write!(f, "syscall {}", id),
Instruction::Return => write!(f, "return"),
Instruction::Jmp { location } => write!(f, "jmp {}", location),
Instruction::JmpPos { l1, location } => write!(f, "jp {}, {}", l1, location),
Instruction::JmpNeg { l1, location } => write!(f, "jn {}, {}", l1, location),
Instruction::JmpZero { l1, location } => write!(f, "jz {}, {}", l1, location),
Instruction::Cmp { l1, l2 } => write!(f, "cmp {}, {}", l1, l2),
}
}
}
1 change: 1 addition & 0 deletions compiler/hash-vm/src/bytecode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! logic.
pub mod instruction;
pub mod op;
pub mod pretty;
pub mod register;

pub use instruction::*;
Expand Down
18 changes: 18 additions & 0 deletions compiler/hash-vm/src/bytecode/op.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::fmt;

use hash_utils::index_vec::Idx;

use super::register::{Register, RegisterSet};
Expand Down Expand Up @@ -52,6 +54,12 @@ impl Idx for LabelOffset {
}
}

impl fmt::Display for LabelOffset {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}

/// A type that can either be a [Register] or a [LabelOffset].
///
/// This is used in instructions that can take either a register or a label
Expand Down Expand Up @@ -100,3 +108,13 @@ impl Operand {
}
}
}

impl fmt::Display for Operand {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Operand::Register(reg) => write!(f, "{}", reg),
Operand::Label(label) => write!(f, "${}", label),
Operand::Immediate(value) => write!(f, "#{}", value),
}
}
}
154 changes: 154 additions & 0 deletions compiler/hash-vm/src/bytecode/pretty.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
//! This module contains the logic and implementation in order to
//! write bytecode as a human-readable string format. The specific
//! implementation is reliant on passing a "function" body that is to
//! be pretty printed.

use std::fmt;

use hash_utils::index_vec::IndexVec;

use crate::bytecode::{Instruction, LabelOffset};

pub trait FunctionBody {
/// Get the labels within the function body.
fn labels(&self) -> &IndexVec<LabelOffset, LabelOffset>;

/// Get the instructions within the function body.
fn instructions(&self) -> &IndexVec<LabelOffset, Instruction>;
}

pub struct BytecodePrettyPrinter<'a, F: FunctionBody> {
body: &'a F,
}

impl<'a, F: FunctionBody> BytecodePrettyPrinter<'a, F> {
/// Create a new [BytecodePrettyPrinter] for the given function body.
pub fn new(body: &'a F) -> Self {
Self { body }
}
}

impl<F: FunctionBody> fmt::Display for BytecodePrettyPrinter<'_, F> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut counter = 0;
let mut label_counter: usize = 0;

// @Todo: print some kind of header information for the function, or maybe
// support a way to add some kind of annotations to the specific
// instructions.

// iterate over the labels in windows, that should give us the start and end of
// each label range
for window in self.body.labels().windows(2) {
let (start, end) = (window[0], window[1]);

// print the label with indentation matching instruction format
writeln!(f, "{:>4}:", format!("${}", label_counter))?;
label_counter += 1;

// now write the instructions under this label:
for instruction in self.body.instructions()[start..end].iter() {
Self::write_instruction(f, counter, instruction)?;
counter += 1;
}
}

// we need to print the remaining instructions after the last label
if let Some(last_label) = self.body.labels().last() {
writeln!(f, "{:>4}:", format!("${}", label_counter))?;

for instruction in self.body.instructions()[*last_label..].iter() {
Self::write_instruction(f, counter, instruction)?;
counter += 1;
}
}

Ok(())
}
}

impl<F: FunctionBody> BytecodePrettyPrinter<'_, F> {
/// Write a single instruction with proper alignment.
fn write_instruction(
f: &mut fmt::Formatter<'_>,
counter: usize,
instruction: &Instruction,
) -> fmt::Result {
// Convert the instruction to a string and split it into name and operands
let instruction_str = instruction.to_string();

if let Some((name, operands)) = instruction_str.split_once(' ') {
// Instruction has operands - align them
writeln!(f, "{:04}: {:>7} {}", counter, name, operands)?;
} else {
// Instruction has no operands (e.g., "return")
writeln!(f, "{:04}: {:>7}", counter, instruction_str)?;
}

Ok(())
}
}

#[cfg(test)]
mod tests {
use hash_utils::index_vec::index_vec;

use super::*;
use crate::bytecode::{Instruction, op::Operand, register::Register};

// Test implementation of FunctionBody
struct TestFunctionBody {
labels: IndexVec<LabelOffset, LabelOffset>,
instructions: IndexVec<LabelOffset, Instruction>,
}

impl FunctionBody for TestFunctionBody {
fn labels(&self) -> &IndexVec<LabelOffset, LabelOffset> {
&self.labels
}

fn instructions(&self) -> &IndexVec<LabelOffset, Instruction> {
&self.instructions
}
}

#[test]
fn test_bytecode_pretty_printer_formatting() {
// Create a test function body with multiple labels and instructions
let test_body = TestFunctionBody {
// Labels at positions 0, 4, 6
labels: index_vec![LabelOffset::new(0), LabelOffset::new(4), LabelOffset::new(6),],
// Instructions matching the test case
instructions: index_vec![
Instruction::Write32 { reg: Register::new(0), value: 10 },
Instruction::Write32 { reg: Register::new(1), value: 20 },
Instruction::Add32 { l1: Register::new(0), l2: Register::new(1) },
Instruction::JmpPos {
l1: Register::new(0),
location: Operand::Label(LabelOffset::new(2))
},
Instruction::Write32 { reg: Register::new(0), value: 0 },
Instruction::Return,
Instruction::Write32 { reg: Register::new(0), value: 1 },
Instruction::Return,
],
};

let output = format!("{}", BytecodePrettyPrinter::new(&test_body));

// Verify the output has the expected format
assert!(output.contains("$0:"), "Output should contain label $0");
assert!(output.contains("$1:"), "Output should contain label $1");
assert!(output.contains("$2:"), "Output should contain label $2");

// Verify instructions are formatted with proper alignment
assert!(output.contains("0000:"), "Output should start with instruction 0000");
assert!(output.contains("write32"), "Output should contain write32 instruction");
assert!(output.contains("add32"), "Output should contain add32 instruction");
assert!(output.contains("jp"), "Output should contain jp (jump positive) instruction");
assert!(output.contains("return"), "Output should contain return instruction");

// Print for visual inspection during test runs with --nocapture
println!("Bytecode Pretty Printer Output:\n{}", output);
}
}
Loading