diff --git a/.github/workflows/evm.yml b/.github/workflows/evm.yml new file mode 100644 index 0000000..9c654f9 --- /dev/null +++ b/.github/workflows/evm.yml @@ -0,0 +1,42 @@ +name: evm + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + + - name: Cache cargo registry and git + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-registry- + + - name: Cache target directory + uses: actions/cache@v4 + with: + path: target + key: ${{ runner.os }}-cargo-target-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-target- + + - name: Build workspace + run: cargo build --workspace --verbose diff --git a/Cargo.lock b/Cargo.lock index 1047ff9..4bc9594 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1380,6 +1380,7 @@ dependencies = [ name = "evm" version = "0.1.0" dependencies = [ + "alloy", "evm_core", "primitives", ] diff --git a/README.md b/README.md index f534cdf..72e568e 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,25 @@ -evm/README.md +# eth_vm — Concise reference -# eth_vm — Technical Reference +A minimal Rust EVM prototype for experimentation. This file is intentionally short and points to the most relevant locations. -This repository implements a small, Rust-based Ethereum Virtual Machine (EVM) prototype. It provides: +Purpose -- a minimal `Evm` execution context, -- an `Opcode` enumeration (canonical bytes -> names), -- a 256-entry jump table which dispatches bytes to function handlers, -- a set of operation implementations in `operations/ariths.rs`, -- primitive runtime structures (memory, stack, storage, transaction and block env). +- Small EVM core for learning and extension. +- Not production-ready: no gas metering, limited validation, and some panics (`unwrap()`). -This README is a developer-focused reference describing the code layout, the real "goto" dispatch mapping (jump-table), how the opcode enumeration is organized, where the handlers live, and practical notes for extending the implementation. For behavioral details (stack effects, gas, edge cases) use the canonical documentation at https://www.evm.codes/. +Project layout (essential) -Repository layout (relevant files and modules) +- `crates/evm_core` — VM core: `Opcode` enum, jump table, handlers. +- `crates/primitives` — runtime primitives: `Memory`, `Stack`, `EvmStorage`, `Transaction`, `BlockEnv`. +- `bins/evm` — example runner that creates an `Evm` instance. -- `crates/evm_core` — core EVM logic, jump table, opcodes, operation implementations. -- `crates/primitives` — runtime building blocks: `Memory`, `Stack`, `EvmStorage`, `Transaction`, `BlockEnv`. -- `bins/evm` — minimal runner. +Dispatch (runtime) -Key files (open these while reading this doc) +- `Evm::step()` reads a byte from `memory[pc]`, converts it with `Opcode::from_u8`, looks up the handler in the 256-entry table from `build_jump_table()`, calls the handler (`fn(&mut Evm)`), then increments `pc`. Handlers that perform jumps must set `evm.pc` directly. -- `crates/evm_core/src/opcodes.rs` — opcode enum and conversion from bytes +Short opcode snippet (Rust) -```crates/evm_core/src/opcodes.rs#L1-220 +```crates/evm_core/src/opcodes.rs#L1-20 pub enum Opcode { STOP = 0x00, ADD = 0x01, @@ -36,564 +33,22 @@ pub enum Opcode { MULMOD = 0x09, EXP = 0x0A, SIGNEXTEND = 0x0B, - - LT = 0x10, - GT = 0x11, - SLT = 0x12, - SGT = 0x13, - EQ = 0x14, - ISZERO = 0x15, - AND = 0x16, - OR = 0x17, - XOR = 0x18, - NOT = 0x19, - BYTE = 0x1A, - SHL = 0x1B, - SHR = 0x1C, - SAR = 0x1D, - - KECCAK256 = 0x20, - - ADDRESS = 0x30, - BALANCE = 0x31, - ORIGIN = 0x32, - CALLER = 0x33, - CALLVALUE = 0x34, - CALLDATALOAD = 0x35, - CALLDATASIZE = 0x36, - CALLDATACOPY = 0x37, - CODESIZE = 0x38, - CODECOPY = 0x39, - GASPRICE = 0x3A, - EXTCODESIZE = 0x3B, - EXTCODECOPY = 0x3C, - RETURNDATASIZE = 0x3D, - RETURNDATACOPY = 0x3E, - EXTCODEHASH = 0x3F, - - BLOCKHASH = 0x40, - COINBASE = 0x41, - TIMESTAMP = 0x42, - NUMBER = 0x43, - DIFFICULTY = 0x44, - GASLIMIT = 0x45, - CHAINID = 0x46, - SELFBALANCE = 0x47, - BASEFEE = 0x48, - BLOBHASH = 0x49, - BLOBBASEFEE = 0x4A, - - POP = 0x50, - MLOAD = 0x51, - MSTORE = 0x52, - MSTORE8 = 0x53, - SLOAD = 0x54, - SSTORE = 0x55, - JUMP = 0x56, - JUMPI = 0x57, - PC = 0x58, - MSIZE = 0x59, - GAS = 0x5A, - JUMPDEST = 0x5B, - TLOAD = 0x5C, - TSTORE = 0x5D, - MCOPY = 0x5E, - - PUSH0 = 0x5F, - PUSH1 = 0x60, - PUSH2 = 0x61, - PUSH3 = 0x62, - PUSH4 = 0x63, - PUSH5 = 0x64, - PUSH6 = 0x65, - PUSH7 = 0x66, - PUSH8 = 0x67, - PUSH9 = 0x68, - PUSH10 = 0x69, - PUSH11 = 0x6A, - PUSH12 = 0x6B, - PUSH13 = 0x6C, - PUSH14 = 0x6D, - PUSH15 = 0x6E, - PUSH16 = 0x6F, - PUSH17 = 0x70, - PUSH18 = 0x71, - PUSH19 = 0x72, - PUSH20 = 0x73, - PUSH21 = 0x74, - PUSH22 = 0x75, - PUSH23 = 0x76, - PUSH24 = 0x77, - PUSH25 = 0x78, - PUSH26 = 0x79, - PUSH27 = 0x7A, - PUSH28 = 0x7B, - PUSH29 = 0x7C, - PUSH30 = 0x7D, - PUSH31 = 0x7E, - PUSH32 = 0x7F, - - DUP1 = 0x80, - DUP2 = 0x81, - DUP3 = 0x82, - DUP4 = 0x83, - DUP5 = 0x84, - DUP6 = 0x85, - DUP7 = 0x86, - DUP8 = 0x87, - DUP9 = 0x88, - DUP10 = 0x89, - DUP11 = 0x8A, - DUP12 = 0x8B, - DUP13 = 0x8C, - DUP14 = 0x8D, - DUP15 = 0x8E, - DUP16 = 0x8F, - - SWAP1 = 0x90, - SWAP2 = 0x91, - SWAP3 = 0x92, - SWAP4 = 0x93, - SWAP5 = 0x94, - SWAP6 = 0x95, - SWAP7 = 0x96, - SWAP8 = 0x97, - SWAP9 = 0x98, - SWAP10 = 0x99, - SWAP11 = 0x9A, - SWAP12 = 0x9B, - SWAP13 = 0x9C, - SWAP14 = 0x9D, - SWAP15 = 0x9E, - SWAP16 = 0x9F, - - LOG0 = 0xA0, - LOG1 = 0xA1, - LOG2 = 0xA2, - LOG3 = 0xA3, - LOG4 = 0xA4, - - DATALOAD = 0xD0, - DATALOADN = 0xD1, - DATASIZE = 0xD2, - DATACOPY = 0xD3, - - RJUMP = 0xE0, - RJUMPI = 0xE1, - RJUMPV = 0xE2, - CALLF = 0xE3, - RETF = 0xE4, - JUMPF = 0xE5, - DUPN = 0xE6, - SWAPN = 0xE7, - EXCHANGE = 0xE8, - - EOFCREATE = 0xEC, - RETURNCONTRACT = 0xEE, - - CREATE = 0xF0, - CALL = 0xF1, - CALLCODE = 0xF2, - RETURN = 0xF3, - DELEGATECALL = 0xF4, - CREATE2 = 0xF5, - - RETURNDATALOAD = 0xF7, - EXTCALL = 0xF8, - EXTDELEGATECALL = 0xF9, - STATICCALL = 0xFA, - EXTSTATICCALL = 0xFB, - - REVERT = 0xFD, - INVALID = 0xFE, - SELFDESTRUCT = 0xFF, -} -``` - -- `crates/evm_core/src/jump_tables.rs` — builds the dispatch table (the actual "goto" mapping). - -```crates/evm_core/src/jump_tables.rs#L1-80 -use crate::{Evm, opcodes::Opcode, operations::ariths::*}; - -pub type OpcodeFn = fn(&mut Evm); - -pub fn noop(_evm: &mut Evm) {} - -pub fn build_jump_table() -> [OpcodeFn; 256] { - let mut jump_table: [fn(&mut Evm); 256] = [noop as OpcodeFn; 256]; - jump_table[Opcode::STOP as usize] = stop; - jump_table[Opcode::ADD as usize] = add; - jump_table[Opcode::SUB as usize] = sub; - jump_table[Opcode::MUL as usize] = mul; - jump_table[Opcode::DIV as usize] = div; - jump_table[Opcode::SDIV as usize] = sdiv; - jump_table[Opcode::SMOD as usize] = smod; - jump_table[Opcode::MOD as usize] = modulo; - // jump_table[Opcode::MLOAD as usize] = m_load; - // jump_table[Opcode::CHAINID as usize] = chain_id; - // jump_table[Opcode::COINBASE as usize] = coin_base; - - - jump_table -} -``` - -- `crates/evm_core/src/operations/ariths.rs` — implementations for several arithmetic and environment-facing opcodes. - -```crates/evm_core/src/operations/ariths.rs#L1-220 -use alloy::primitives::{Address, I256, U64, U256}; - -use crate::{Evm, ProgramExitStatus}; - -// ref == https://www.evm.codes/ -pub fn stop(evm: &mut Evm) { - evm.status = ProgramExitStatus::Success; -} - -pub fn add(evm: &mut Evm) { - let a = evm.stack.pop().unwrap(); - let b = evm.stack.pop().unwrap(); - evm.stack.push(a + b).unwrap(); -} - -pub fn sub(evm: &mut Evm) { - let a = evm.stack.pop().unwrap(); - let b = evm.stack.pop().unwrap(); - evm.stack.push(a - b).unwrap(); -} - -pub fn mul(evm: &mut Evm) { - let a = evm.stack.pop().unwrap(); - let b = evm.stack.pop().unwrap(); - evm.stack.push(a * b).unwrap(); -} - -pub fn div(evm: &mut Evm) { - let a = evm.stack.pop().unwrap(); - let b = evm.stack.pop().unwrap(); - if b == U256::ZERO { - evm.stack.push(U256::ZERO).unwrap(); - } else { - evm.stack.push(a / b).unwrap(); - } -} - -pub fn sdiv(evm: &mut Evm) { - let a = evm.stack.pop().unwrap(); - let b = evm.stack.pop().unwrap(); - - let a_int = I256::from_limbs(*a.as_limbs()); - let b_int = I256::from_limbs(*b.as_limbs()); - - if b_int == I256::ZERO { - evm.stack.push(U256::ZERO).unwrap(); - } else { - let result = a_int / b_int; - let result_unsigned = U256::from_limbs(*result.as_limbs()); - evm.stack.push(result_unsigned).unwrap(); - } -} -... -pub fn push_0(evm: &mut Evm) { - evm.stack.push(U256::ZERO).unwrap(); -} -``` - -- `crates/evm_core/src/lib.rs` — `Evm` structure and fetch-decode-dispatch loop. - -```crates/evm_core/src/lib.rs#L1-220 -pub mod jump_tables; -pub mod opcodes; -pub mod operations { - pub mod ariths; -} - -use alloy::primitives::Address; -use primitives::{ - evm_types::{BlockEnv, EvmStorage, Transaction}, - memory::Memory, - stack::Stack, -}; - -use crate::{jump_tables::build_jump_table, opcodes::Opcode}; - -#[derive(Debug, Clone, Default, PartialEq)] -pub enum ProgramExitStatus { - Success, - Failure, - #[default] - Default, -} - -#[derive(Debug, Clone, Default)] -pub struct Evm { - pub block_env: BlockEnv, - pub tx: Transaction, - pub memory: Memory, - pub stack: Stack, - pub storage: EvmStorage, - pub pc: usize, - pub status: ProgramExitStatus, -} - -impl Evm { - pub fn new( - block_env: BlockEnv, - tx: Transaction, - memory: Memory, - stack: Stack, - storage: EvmStorage, - ) -> Self { - Evm { - block_env, - tx, - memory, - stack, - storage, - pc: 0, - status: ProgramExitStatus::default(), - } - } - - pub fn execute(&mut self) { - if self.tx.to == Address::ZERO && !self.stack.data.is_empty() { - for (i, value) in self.tx.data.iter().enumerate() { - // Process each value in the stack - println!("Value at index {}: {}", i, value); - self.memory.store_byte(i, *value); - } - } else if self.tx.to != Address::ZERO { - let touched_contract: Address = self.tx.to; - for (i, v) in self - .storage - .data - .get(&touched_contract) - .unwrap() - .code - .iter() - .enumerate() - { - self.memory.store_byte(i, *v); - } - } else { - } - } - - pub fn step(&mut self) { - let raw_instruction = self.memory.load_byte(self.pc); - let instruction: Opcode = Opcode::from_u8(raw_instruction).unwrap(); - - let jump_tables: [fn(&mut Evm); 256] = build_jump_table(); - jump_tables[instruction as usize](self); - self.pc += 1; - } - - pub fn run(&mut self) { - while self.status == ProgramExitStatus::default() { - self.step(); - } - } + // ... remainder in file } ``` -Dispatch / "goto" behavior (the actual runtime mapping) - -- Dispatch kernel is `Evm::step`: - - read instruction byte at `memory[pc]` (`Memory::load_byte`), - - convert to `Opcode` via `Opcode::from_u8`, - - build the jump table (`build_jump_table()`), - - call the function pointer in `jump_table[instruction as usize]`, - - increment `pc`. - -- The jump-table is a 256-element array of `fn(&mut Evm)` that defaults to `noop`. - - Currently the jump-table explicitly assigns handlers for the following opcodes: - - `0x00` (STOP) -> `operations::ariths::stop` - - `0x01` (ADD) -> `operations::ariths::add` - - `0x03` (SUB) -> `operations::ariths::sub` - - `0x02` (MUL) -> `operations::ariths::mul` - - `0x04` (DIV) -> `operations::ariths::div` - - `0x05` (SDIV) -> `operations::ariths::sdiv` - - `0x07` (SMOD) -> `operations::ariths::smod` - - `0x06` (MOD) -> `operations::ariths::modulo` - - All other opcodes fall back to `noop` until added. - -Canonical opcode reference - -- This project defines the opcode set in `crates/evm_core/src/opcodes.rs`. Use https://www.evm.codes/ as the authoritative specification for: - - opcode semantics, - - stack arguments and results, - - gas costs, - - valid/invalid behavior (e.g., DIV by zero semantics, SSTORE changes, etc). - -How to extend / add a new opcode (summary) - -1. Add the opcode constant to `opcodes.rs` (if not present already). - - The enum maps byte value to variant; `Opcode::from_u8` must map the byte as well (add a match arm). -2. Implement the handler function in `crates/evm_core/src/operations/*`. - - Signature: `pub fn handler(evm: &mut Evm) { ... }`. - - Use primitives (`evm.stack`, `evm.memory`, `evm.storage`, `evm.block_env`, `evm.tx`) for operations. -3. Wire the handler in `crates/evm_core/src/jump_tables.rs` inside `build_jump_table()`: - - `jump_table[Opcode::MYOP as usize] = my_handler;` -4. Add tests that exercise correct stack effects, memory & storage mutations, and any edge-case behaviors. -5. Consider gas accounting and invalid opcode behavior — this implementation currently does not track gas. - -Important implementation details & current limitations (things to know) - -- `jump_tables::build_jump_table()` creates a new jump table on every `step()` call. For performance you may want to make the jump table a static once-initialized array. Right now the mapping is rebuilt each instruction. -- Many functions use `.unwrap()` on `stack.pop()` and other lookups. This will panic when the stack is empty or maps are missing. Production implementations should return proper trap/error codes and handle stack underflow gracefully. -- `Memory` is a simple `Vec` and uses direct slices. `store_word`, `load_word`, and `store_byte` assume the vector has sufficient length; currently there is no automatic growth logic. You must call `memory.data.resize(...)` or implement memory grow behavior to avoid panics. - - See `crates/primitives/src/memory.rs` for current methods: +How to add an opcode (3 steps) -```crates/primitives/src/memory.rs#L1-160 -//! +1. Add/confirm enum entry and `from_u8` mapping in `crates/evm_core/src/opcodes.rs`. +2. Implement handler `fn(&mut Evm)` in `crates/evm_core/src/operations/`. +3. Register it in `crates/evm_core/src/jump_tables.rs`: -use alloy::primitives::U256; - -#[derive(Default, Debug, Clone)] -pub struct Memory { - // Fields - pub data: Vec, -} - -impl Memory { - pub fn new() -> Self { - Memory { data: Vec::new() } - } - - pub fn new_with_data(data: Vec) -> Self { - Memory { data } - } - - pub fn store_word(&mut self, offset: usize, word: U256) { - let word_to_bytes: [u8; 32] = word.to_be_bytes::<32>(); - self.data[offset..offset + 32].copy_from_slice(&word_to_bytes); - } - - pub fn load_word(&self, offset: usize) -> U256 { - let bytes = &self.data[offset..offset + 32]; - - U256::from_be_slice(bytes.try_into().unwrap()) - } - - pub fn store_byte(&mut self, offset: usize, byte: u8) { - self.data[offset] = byte; - } - - pub fn load_byte(&self, offset: usize) -> u8 { - self.data[offset] - } - - pub fn copy(&mut self, offset: usize, dest: usize, length: usize) -> u8 { - let data = &self.data[offset..offset + length]; - 0 - } -} +```rust +jump_table[Opcode::MYOP as usize] = myop_handler; ``` -- `Stack` enforces a maximum depth (1024). Pushing beyond that returns `EvmErrors::StackTooDeep`. See `crates/primitives/src/stack.rs`. - -```crates/primitives/src/stack.rs#L1-120 -use crate::errors::EvmErrors; -use alloy::primitives::U256; - -#[derive(Debug, Clone, Default)] -pub struct Stack { - pub data: Vec, -} - -impl Stack { - /// Push a value onto the stack. - /// Returns `Err(EvmErrors::StackTooDeep)` if the stack would exceed 1024 items. - pub fn push(&mut self, value: U256) -> Result<(), EvmErrors> { - if self.data.len() >= 1024 { - return Err(EvmErrors::StackTooDeep); - } - self.data.push(value); - Ok(()) - } - - /// Pop a value from the stack. Returns `None` if the stack is empty. - pub fn pop(&mut self) -> Option { - self.data.pop() - } - - /// Return current stack size. This is useful for testing and diagnostics. - pub fn len(&self) -> usize { - self.data.len() - } - - /// Return whether the stack is empty. - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } -} -``` - -Notes about specific opcodes currently implemented in handlers - -- Arithmetic & logical: `ADD`, `SUB`, `MUL`, `DIV`, `SDIV`, `MOD`, `SMOD`, `ADDMOD`, `MULMOD`, `EXP`, `SIGNEXTEND`, `LT`, `GT`, `SLT`, `SGT`, `EQ`, `ISZERO`, `AND`, `XOR`, `NOT`, `BYTE`, `SHL`, `SHR`, `SAR` — many implementations exist in `ariths.rs`. But not all are wired into the jump table yet. -- Environment & blockchain: `ADDRESS`, `BALANCE`, `ORIGIN`, `CALLER`, `CALLVALUE`, `CALLDATALOAD`, `GASPRICE`, `BLOCKHASH`, `COINBASE`, `TIMESTAMP`, `NUMBER`, `GASLIMIT`, `CHAINID` — implementations present for reading environment fields but beware: some functions assume values exist (e.g., `storage.data.get(&address).unwrap()`). -- Stack / Memory / Storage ops: `POP`, `MLOAD`, `MSTORE`, `MSTORE8`, `SLOAD`, `SSTORE` — implementations present but not all wired into jump table. -- Control flow: `JUMP`, `JUMPI`, `JUMPDEST` — present as simple implementations (e.g., `jump` sets `evm.pc = target`), but there is no full validity check (JUMPDEST must be validated in canonical EVM; this implementation currently does not validate whether target is a `JUMPDEST`). -- Complex/extended opcodes (`CALL`, `CREATE`, `STATICCALL`, etc.) are enumerated but not implemented. - -Practical "goto" mapping cheat-sheet - -- Where is the runtime goto performed? `Evm::step`: - 1. `let raw_instruction = self.memory.load_byte(self.pc);` - 2. `let instruction: Opcode = Opcode::from_u8(raw_instruction).unwrap();` - 3. `let jump_tables: [fn(&mut Evm); 256] = build_jump_table();` - 4. `jump_tables[instruction as usize](self);` - 5. `self.pc += 1;` -- Where to add a new mapping (example `MYOP`): - - Add enum value to `opcodes.rs` and arm to `from_u8`. - - Implement `pub fn myop(evm: &mut Evm) { ... }` in `operations/*`. - - Add `jump_table[Opcode::MYOP as usize] = myop;` in `build_jump_table()`. - -Security / correctness considerations - -- Replace `unwrap()` usage with error handling to avoid panics in malformed inputs (stack underflow, missing storage entries, memory OOB). -- Add memory growth semantics: EVM memory grows in 32-byte words and gas must be accounted for. -- Implement full JUMPDEST validation to prevent jumping to arbitrary bytes. -- Implement gas metering per opcode to allow early halting (out-of-gas) and resource accounting. - -Useful references - -- Official opcode semantics and descriptions: https://www.evm.codes/ — use it as the primary reference for how each opcode should behave including gas, pop/push counts, and specific rules. -- Current project entrypoint (for experimentation): `bins/evm/src/main.rs`. - -```bins/evm/src/main.rs#L1-80 -use evm_core::Evm; -use primitives::{evm_types::{BlockEnv, EvmStorage, Transaction}, memory::Memory, stack::Stack}; - -fn main() { - - let block_env = BlockEnv::default(); - let memory = Memory::default(); - let stack = Stack::default(); - let storage = EvmStorage::default(); - - let tx: Transaction = Transaction::default(); - - - let mut evm = Evm::new(block_env, tx, memory, stack, storage); - // println!("Hello, world!"); -} -``` - -Recommended next steps to get a robust reference implementation - -1. Implement memory growth and bounds checking in `Memory`. -2. Replace `unwrap()` with safe error returns and introduce a `Result`-based opcode return convention (or an error status on `Evm`). -3. Implement gas metering and per-opcode cost model. -4. Build a static jump-table (static once-initialized) to avoid reallocating each step. -5. Add tests that compare behavior against canonical outcomes (e.g., against test vectors or a reference client). -6. Wire the remaining opcodes to their handlers and implement missing handlers (especially `PUSHn`, `DUP`, `SWAP`, `LOG`, `CALL*`, `CREATE*`) and implement `CALLDATA` / `RETURNDATA` semantics. - -If you want, I can: +Notes -- generate a mapping CSV of opcode byte -> name -> current handler location (based on code), -- add a static jump table implementation, -- or create a template to implement `PUSH1`/`PUSH32` and wiring into the table with proper memory growth and stack checks. +- For exact semantics and gas rules consult https://www.evm.codes/. +- To run locally: from repo root use `cargo build --workspace` -Tell me which one you'd like next (mapping CSV / static jump table implementation / add PUSH handlers / add gas accounting), and I will implement it and update the code. diff --git a/bins/evm/Cargo.toml b/bins/evm/Cargo.toml index b1905ad..bebcd62 100644 --- a/bins/evm/Cargo.toml +++ b/bins/evm/Cargo.toml @@ -5,4 +5,5 @@ edition = "2024" [dependencies] evm_core = {workspace = true} -primitives = {workspace = true} \ No newline at end of file +primitives = {workspace = true} +alloy = {workspace = true} \ No newline at end of file diff --git a/bins/evm/src/main.rs b/bins/evm/src/main.rs index a7c0187..12b71f0 100644 --- a/bins/evm/src/main.rs +++ b/bins/evm/src/main.rs @@ -1,16 +1,94 @@ +use std::vec; + +use alloy::primitives::{Address, U256}; use evm_core::Evm; -use primitives::{evm_types::{BlockEnv, EvmStorage, Transaction}, memory::Memory, stack::Stack}; +use primitives::{ + evm_types::{BlockEnv, EvmStorage, Transaction}, + memory::Memory, + stack::Stack, +}; fn main() { - + // for contract deployment + // + // This example constructs a tiny EVM bytecode sequence (raw bytes) in `call_data` + // that demonstrates: + // 1) pushing two small immediates (6 and 7) onto the stack using PUSH1 (0x60), + // 2) adding them to produce 13 (0x0d), + // 3) storing the 32-byte word containing that value at memory offset 0 using MSTORE (0x52), + // 4) loading the stored word back with MLOAD (0x51), + // 5) and halting with STOP (0x00). + // + // Byte-level breakdown (human-readable): + // 0x60 0x06 -> PUSH1 0x06 ; push the constant 6 onto the stack + // 0x60 0x07 -> PUSH1 0x07 ; push the constant 7 onto the stack + // 0x01 -> ADD ; pop 7 and 6, push (6 + 7) = 13 + // 0x60 0x00 -> PUSH1 0x00 ; push memory offset 0 onto the stack (offset where we'll store) + // 0x52 -> MSTORE ; store 32-byte word (value) at memory[offset] + // // MSTORE consumes (offset, value) from the stack (see NOTE below) + // 0x60 0x00 -> PUSH1 0x00 ; push memory offset 0 to read back the stored value + // 0x51 -> MLOAD ; load 32-byte word from memory[offset] and push it on the stack + // 0x00 -> STOP ; halt execution + // + // NOTE about stack order and MSTORE: + // EVM op semantics require careful ordering so MSTORE sees the expected items on the stack. + // - After ADD the top of stack is the numeric result (13). + // - We then PUSH1 0x00 (the offset). At that point the stack (top -> bottom) is: [offset=0, value=13]. + // - MSTORE will pop offset and value and write value at memory[offset]. + // Different implementations or interpretations may describe pop order differently; this code + // arranges the pushes so the MSTORE call receives the correct pair for this VM's handlers. + // + // Raw bytes (literal sequence used as the transaction data / init code): + // [0x60,0x06, 0x60,0x07, 0x01, 0x60,0x00, 0x52, 0x60,0x00, 0x51, 0x00] + // + // We put these bytes into `call_data` and also into `tx.data` so the `Evm` instance has the code + // available in the transaction payload. If you want to treat this as deployed contract code, you + // would instead write it to a storage account's `code` and set `tx.to` accordingly. + let call_data: Vec = vec![ + 0x60, 0x06, // PUSH1 0x06 -> push 6 + 0x60, 0x07, // PUSH1 0x07 -> push 7 + 0x01, // ADD -> pop 7,6 push 13 + 0x60, 0x00, // PUSH1 0x00 -> push memory offset 0 + 0x52, // MSTORE -> store 32-byte word at memory[offset] + 0x60, 0x00, // PUSH1 0x00 -> push memory offset 0 (to read back) + 0x51, // MLOAD -> load 32-byte word from memory[offset] + 0x00, // STOP -> halt + ]; + + // Block environment and memory initialization let block_env = BlockEnv::default(); - let memory = Memory::default(); + // initialize memory with 1 KiB (1024 bytes) so it has a default size before growth + let memory: Memory = Memory::new_with_data(vec![0u8; 1024]); let stack = Stack::default(); let storage = EvmStorage::default(); - - let tx: Transaction = Transaction::default(); - - + + // Transaction: put our bytecode into `data` so the EVM can load/process it. + // If you want this to behave like contract creation code, you can set `tx.to` to Address::ZERO + // (already the case here) and the VM code that seeds memory from `tx.data` will place these + // bytes into memory for execution. + let tx: Transaction = Transaction { + from: Address::from_slice(&[1]), + to: Address::ZERO, + value: U256::ZERO, + nonce: U256::ZERO, + data: call_data.clone(), // transaction payload contains our raw opcodes + gas_limit: U256::from(100000), + }; + + // Create the EVM instance with the prepared environment and transaction. let mut evm = Evm::new(block_env, tx, memory, stack, storage); - // println!("Hello, world!"); + + // NOTE: this example only constructs the EVM state and populates tx.data. + // To actually execute the bytecode you need to call `evm.execute()` or `evm.run()` + // depending on how you want to drive execution. Execution will depend on how the + // VM is implemented to load the tx.data into memory and interpret it. + // + // Example: if the VM expects contract code in memory when tx.to == Address::ZERO, + // calling `evm.execute()` (or `evm.run()`) should iterate through the opcodes and + // perform the pushes, arithmetic, MSTORE/MLOAD, and STOP as described above. + // + // You can print the transaction data for verification: + // println!(\"tx.data = {:x?}\", evm.tx.data); + + // println!(\"Hello, world!\"); } diff --git a/crates/evm_core/src/jump_tables.rs b/crates/evm_core/src/jump_tables.rs index c50c030..3185338 100644 --- a/crates/evm_core/src/jump_tables.rs +++ b/crates/evm_core/src/jump_tables.rs @@ -17,7 +17,6 @@ pub fn build_jump_table() -> [OpcodeFn; 256] { // jump_table[Opcode::MLOAD as usize] = m_load; // jump_table[Opcode::CHAINID as usize] = chain_id; // jump_table[Opcode::COINBASE as usize] = coin_base; - jump_table } diff --git a/crates/evm_core/src/lib.rs b/crates/evm_core/src/lib.rs index f1f1738..da7c45e 100644 --- a/crates/evm_core/src/lib.rs +++ b/crates/evm_core/src/lib.rs @@ -13,6 +13,10 @@ use primitives::{ use crate::{jump_tables::build_jump_table, opcodes::Opcode}; +/// Program exit states for the VM. The VM loop (`run`) uses this to determine when to stop. +/// - `Success` indicates the program stopped successfully (for example via `STOP` opcode). +/// - `Failure` indicates a trap/exception (e.g. invalid opcode, maybe an out-of-gas ). +/// - `Default` means "still running" or uninitialized status; the run loop continues while status is `Default`. #[derive(Debug, Clone, Default, PartialEq)] pub enum ProgramExitStatus { Success, @@ -21,6 +25,46 @@ pub enum ProgramExitStatus { Default, } +/// The EVM runtime structure. +/// +/// This struct aggregates all pieces of state the interpreter needs to execute bytecode: +/// - `block_env`: on-chain block/environment information used by environment opcodes (timestamp, number, coinbase,) +/// - `tx`: the transaction context (caller, callee, call value, calldata). This crate uses `tx.data` for code when +/// `tx.to == Address::ZERO` (contract creation / init-style behavior). +/// - `memory`: linear byte-addressable memory used by MSTORE/MLOAD and other memory ops. +/// - `stack`: the 1024-deep evaluation stack used by all stack-based opcodes. +/// - `storage`: persistent per-account contract storage accessible via SLOAD/SSTORE (map keyed by Address). +/// - `pc`: program counter (index into `memory` where current instruction is read). +/// - `status`: current program exit status (controls `run()` loop). +/// +/// - `block_env: BlockEnv` +/// - Contains block-scoped values such as `number`, `timestamp`, `coinbase`, `gas_limit`, `base_fee`, `block_hash`, `chain_id`. +/// - Necessary for opcodes like `TIMESTAMP`, `NUMBER`, `COINBASE`, `GASLIMIT`, `CHAINID`, `BASEFEE` and `BLOCKHASH`. +/// - Example: `TIMESTAMP` returns `block_env.timestamp`. If you simulate a block at time `t`, set `block_env.timestamp = U256::from(t)`. +/// - `tx: Transaction` +/// - Transaction-level context: `from` (caller), `to` (destination), `value` (wei), `nonce`, `data` (calldata or init code), and `gas_limit`. +/// - Used by CALL* opcodes, `CALLVALUE`, `CALLER`, `CALLDATALOAD`, and for contract creation the `data` can be treated as creation code. +/// - Example: when testing a contract call that sends 1 ether, set `tx.value = U256::from(1_000_000_000_000_000_000u128)` and `tx.from` to the caller address. +/// - `memory: Memory` +/// - Linear, zero-indexed byte array that is transient during execution (not persisted between transactions). +/// - Used by `MSTORE`, `MLOAD`, `CALLDATACOPY`, `CODECOPY`, etc. +/// - Important: this implementation expects memory to have sufficient length before reads/writes. +/// - Example: to store a 32-byte word at offset 0 call `MSTORE` with offset `0` and the word; `memory.store_word(0, word)` writes 32 bytes starting at `memory.data[0]`. +/// - `stack: Stack` +/// - LIFO stack that holds 256-bit values (`U256`). EVM opcodes push/pop values here. +/// - Typical opcodes: `PUSH1..PUSH32` push values, arithmetic opcodes `ADD,SUB` pop operands and push results, `POP` discards top value. +/// - Example: after `PUSH1 0x05; PUSH1 0x03; ADD`, the top of the stack contains `0x08`. +/// - `storage: EvmStorage` +/// - Persistent mapping from account address -> account storage (account fields include `code`, `balance`, `word` map). +/// - Used by `SLOAD`/`SSTORE` to persist contract state across transactions. Must be keyed by the contract address that is being executed. +/// - Example: after `SSTORE` of key `k` to value `v` for contract address `A`, subsequent calls to the same contract can read it with `SLOAD` and get `v`. +/// - `pc: usize` +/// - Program counter (index into `memory.data` where the next opcode byte is read). +/// - `pc` must point at the first byte of an opcode. For `PUSHn` opcodes handlers must advance `pc` by the size of immediates they consumed (or set `pc` appropriately); the `step()` here increments by 1 after the handler by design so handlers that mutate `pc` should account for that. +/// - `status: ProgramExitStatus` +/// - Controls the `run()` loop. When a handler sets `status` to `Success` or `Failure`, `run()` will stop. +/// +/// The EVM struct stores all of the runtime state required to fetch, decode, and execute opcodes. #[derive(Debug, Clone, Default)] pub struct Evm { pub block_env: BlockEnv, @@ -33,6 +77,19 @@ pub struct Evm { } impl Evm { + /// Construct a new EVM instance. + + /// - `block_env`: pass the block environment you want opcodes to observe. For tests set values explicitly: + /// BlockEnv { number: U256::from(123), timestamp: U256::from(1_700_000_000), coinbase: addr, gas_limit: U256::from(30_000_000), ... } + /// - `tx`: transaction payload. For contract creation put creation bytecode in `tx.data` and `tx.to = Address::ZERO`. + /// - `memory`: linear memory buffer. It's acceptable to provide a pre-allocated buffer (e.g. 1 KiB) for convenience. + /// - `stack`: initial stack - normally empty, but tests may pre-populate it for synthetic runs. + /// - `storage`: the node's account storage map. Provide pre-existing accounts if needed (e.g. balances, code). + /// + /// Example usage: + /// ``` + /// let evm = Evm::new(block_env, tx, Memory::new_with_data(vec![0u8;1024]), Stack::default(), EvmStorage::default()); + /// ``` pub fn new( block_env: BlockEnv, tx: Transaction, @@ -51,15 +108,25 @@ impl Evm { } } + /// What this `execute()` currently does (implementation-specific): + /// - If `tx.to == Address::ZERO` (contract creation / deployment), it attempts to copy `tx.data` into memory so the init code is available for execution. + /// - If `tx.to != Address::ZERO`, it attempts to load the touched contract's `code` from `storage` into memory. + + /// - The current `execute()` uses `self.stack.data.is_empty()` NB: this does not reflect real-world checks. + /// - This Implementation uses the unwrap which means errors are not handled. pub fn execute(&mut self) { + // If transaction is a contract creation (to == ZERO), copy tx.data into memory as initial code. if self.tx.to == Address::ZERO && !self.stack.data.is_empty() { for (i, value) in self.tx.data.iter().enumerate() { - // Process each value in the stack + // Writing each byte of tx.data into memory at its corresponding offset. println!("Value at index {}: {}", i, value); self.memory.store_byte(i, *value); } } else if self.tx.to != Address::ZERO { + // If tx.to is set, we are calling an existing contract: load its code into memory. let touched_contract: Address = self.tx.to; + // The code is expected to be found in storage.data[address].code + // NOTE: .unwrap() will panic if address not present; not production frienly for (i, v) in self .storage .data @@ -75,13 +142,34 @@ impl Evm { } } + /// Execute a single instruction at the current `pc`. + /// + /// 1. `raw_instruction = self.memory.load_byte(self.pc)`: + /// - The VM reads a single byte from linear `memory` at position `pc`. + /// - This byte is the opcode code (0x00..0xff). For example 0x60 is `PUSH1`. + /// - Ensure `memory` has been seeded with code (via `execute()` ) and `pc` points to the correct start. + /// 2. `instruction = Opcode::from_u8(raw_instruction).unwrap()`: + /// - Convert the raw byte into the typed `Opcode` enum. If the byte is unknown, `from_u8` returns `None`. + /// - `unwrap()` will panic on undefined bytes — not production friendly, + /// 3. `let jump_tables = build_jump_table()`: + /// - Builds (currently on every `step`) a 256-entry table that maps opcode numeric values to handler functions (`fn(&mut Evm)`). + /// 4. `jump_tables[instruction as usize](self)`: + /// - Call the handler function for the current opcode. Handlers mutate `stack`, `memory`, `pc`, `status`, and other parts of the EVM as needed. + /// - Handlers that consume immediate bytes (e.g., `PUSH1..PUSH32`) + pub fn step(&mut self) { + // Fetch the byte at the program counter from memory. let raw_instruction = self.memory.load_byte(self.pc); + + // Decode: map the raw byte into a strongly typed Opcode enum. let instruction: Opcode = Opcode::from_u8(raw_instruction).unwrap(); + // Build dispatch table and call the handler for the decoded instruction. + // Note: building the table on every step is simple but inefficient; use a cached static table for performance. let jump_tables: [fn(&mut Evm); 256] = build_jump_table(); jump_tables[instruction as usize](self); - self.pc += 1; + + // self.pc += 1; // to be handled in the opcode handler } pub fn run(&mut self) { diff --git a/crates/evm_core/src/operations/ariths.rs b/crates/evm_core/src/operations/ariths.rs index 6556592..43ff0ac 100644 --- a/crates/evm_core/src/operations/ariths.rs +++ b/crates/evm_core/src/operations/ariths.rs @@ -1,30 +1,54 @@ -use alloy::primitives::{Address, I256, U64, U256}; +use alloy::primitives::{Address, I256, U256}; use crate::{Evm, ProgramExitStatus}; // ref == https://www.evm.codes/ + +/// STOP opcode handler +/// - Semantics: halt execution and set program status to Success. +/// - Stack effects: none. pub fn stop(evm: &mut Evm) { evm.status = ProgramExitStatus::Success; } +/// ADD opcode handler +/// NB: No check made for overflow. +/// - Semantics: pop two 256-bit values from the stack (call them `a` and `b`) and push `a + b`. +/// - Stack order in this implementation: +/// * `let a = evm.stack.pop().unwrap();` // top of stack +/// * `let b = evm.stack.pop().unwrap();` // next item +/// Result pushed: `a + b`. +/// - Example: stack before [0x02, 0x03] (top = 0x03) after `add` -> [0x05] (top = 0x05). pub fn add(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); evm.stack.push(a + b).unwrap(); } +/// SUB opcode handler +/// - Semantics: pop `a`, pop `b`, push `a - b` (using unsigned U256 subtraction semantics). +/// - Note on order: because we pop `a` then `b`, the computed value is `a - b` where `a` is the top value. +/// - Example: stack [0x05, 0x02] (top=0x02) -> after `sub` push (0x02 - 0x05) mod 2^256. +/// - Caveat: the implementation uses `U256` arithmetic; negatives wrap around in unsigned interpretation. pub fn sub(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); evm.stack.push(a - b).unwrap(); } +/// MUL opcode handler +/// - Semantics: pop `a`, pop `b`, push `a * b`. +/// - Example: [2, 3] -> push 6. pub fn mul(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); evm.stack.push(a * b).unwrap(); } +/// DIV opcode handler (unsigned) +/// - Semantics: pops `a` and `b`, if `b == 0` push 0, else push `a / b`. +/// - Edge-case: Division by zero returns zero per EVM semantics implemented here. +/// - Example: [10, 2] -> push 5. [10, 0] -> push 0. pub fn div(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); @@ -35,6 +59,10 @@ pub fn div(evm: &mut Evm) { } } +/// SDIV opcode handler (signed division) +/// - Semantics: treat stack values as signed 256-bit integers, divide, then push unsigned representation of result +/// * Converts `U256` limbs into `I256` for signed arithmetic and converts result back to `U256`. +/// * Division by zero pushes `U256::ZERO`. pub fn sdiv(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); @@ -51,6 +79,10 @@ pub fn sdiv(evm: &mut Evm) { } } +/// ADDMOD opcode handler +/// - Semantics: pop `a`, `b`, `c`, compute `(a + b) % c`. If `c == 0` push 0. +/// - Notes: This implementation checks `b` for zero in the original code. +/// - Example: a=2,b=3,c=5 -> (2+3)%5 = 0. pub fn addmod(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); @@ -65,6 +97,9 @@ pub fn addmod(evm: &mut Evm) { } } +/// MULMOD opcode handler +/// - Semantics: pop `a`, `b`, `c`, compute `(a * b) % c`. If `c == 0` push 0. +/// - Example: a=2,b=3,c=4 -> (2*3)%4 = 2. pub fn mulmod(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); @@ -79,6 +114,9 @@ pub fn mulmod(evm: &mut Evm) { } } +/// MOD opcode handler (unsigned modulo) +/// - Semantics: pop `a`, pop `b`, if `b == 0` push 0 else push `a % b`. +/// - Example: [10,3] -> push 1. pub fn modulo(evm: &mut Evm) { let a = evm.stack.pop().unwrap(); let b = evm.stack.pop().unwrap(); @@ -89,6 +127,9 @@ pub fn modulo(evm: &mut Evm) { } } +/// SMOD opcode handler (signed modulo) +/// - Semantics: behaves similarly to MOD but for signed values. This implementation uses unsigned types directly. +/// - Note: This implementation currently delegates to unsigned modulo; adjust if full signed semantics are required. pub fn smod(evm: &mut Evm) { let a: U256 = evm.stack.pop().unwrap(); let b: U256 = evm.stack.pop().unwrap(); @@ -99,6 +140,10 @@ pub fn smod(evm: &mut Evm) { } } +/// EXP opcode handler (exponentiation) +/// - Semantics: pop base, pop exponent, compute base.pow(exponent) and push result. +/// - Warning: exponentiation may be very expensive; no gas accounting here. +/// - Example: base=2, exponent=3 -> push 8. pub fn exp(evm: &mut Evm) { let base: U256 = evm.stack.pop().unwrap(); let exponent: U256 = evm.stack.pop().unwrap(); @@ -120,6 +165,9 @@ pub fn signextend(evm: &mut Evm) { } } +/// LT opcode handler (unsigned less-than) +/// - Semantics: pop left, pop right, push 1 if left < right else 0. +/// - Example: [2,3] -> push 1. pub fn lt(evm: &mut Evm) { let left = evm.stack.pop().unwrap(); let right = evm.stack.pop().unwrap(); @@ -128,6 +176,8 @@ pub fn lt(evm: &mut Evm) { evm.stack.push(U256::from(result)).unwrap(); } +/// GT opcode handler (unsigned greater-than) +/// - Semantics: pop left, pop right, push 1 if left > right else 0. pub fn gt(evm: &mut Evm) { let left = evm.stack.pop().unwrap(); let right = evm.stack.pop().unwrap(); @@ -135,6 +185,8 @@ pub fn gt(evm: &mut Evm) { evm.stack.push(U256::from(result)).unwrap(); } +/// SLT opcode handler (signed less-than) +/// - Semantics: convert both operands to signed `I256`, compare, push 1 if left < right else 0. pub fn slt(evm: &mut Evm) { let left: U256 = evm.stack.pop().unwrap(); let right: U256 = evm.stack.pop().unwrap(); @@ -148,6 +200,8 @@ pub fn slt(evm: &mut Evm) { evm.stack.push(unsigned_result).unwrap(); } +/// SGT opcode handler (signed greater-than) +/// - Semantics: convert both operands to `I256` and compare. pub fn sgt(evm: &mut Evm) { let left: U256 = evm.stack.pop().unwrap(); let right: U256 = evm.stack.pop().unwrap(); @@ -161,6 +215,8 @@ pub fn sgt(evm: &mut Evm) { evm.stack.push(unsigned_result).unwrap(); } +/// EQ opcode handler (equality) +/// - Semantics: pop left, pop right, push 1 if equal else 0. pub fn eq(evm: &mut Evm) { let left = evm.stack.pop().unwrap(); let right = evm.stack.pop().unwrap(); @@ -169,6 +225,8 @@ pub fn eq(evm: &mut Evm) { evm.stack.push(U256::from(result)).unwrap(); } +/// ISZERO opcode handler +/// - Semantics: pop value, push 1 if value == 0 else 0. pub fn is_zero(evm: &mut Evm) { let value = evm.stack.pop().unwrap(); @@ -176,6 +234,8 @@ pub fn is_zero(evm: &mut Evm) { evm.stack.push(U256::from(result)).unwrap(); } +/// AND opcode handler (bitwise) +/// - Semantics: pop left, pop right, push bitwise-and result. pub fn and(evm: &mut Evm) { let left = evm.stack.pop().unwrap(); let right = evm.stack.pop().unwrap(); @@ -184,11 +244,10 @@ pub fn and(evm: &mut Evm) { evm.stack.push(result).unwrap(); } - pub fn byte(evm: &mut Evm) { let index = evm.stack.pop().unwrap(); let value = evm.stack.pop().unwrap(); - + if index.as_limbs()[0] > 32 { evm.stack.push(U256::ZERO).unwrap(); } else { @@ -198,207 +257,271 @@ pub fn byte(evm: &mut Evm) { } } +/// MSTORE opcode handler +/// - Semantics: pop offset, pop value, store 32-byte word `value` at memory[offset..offset+32]. +/// - Stack order: this handler pops `offset` first and then `value`, matching the call-site convention +/// where offset was pushed after value (e.g., push value; push offset; MSTORE). pub fn mstore(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); let value = evm.stack.pop().unwrap(); let offset = offset.as_limbs()[0] as usize; - - evm.memory.store_word(offset, value); + evm.memory.store_word(offset, value); } +/// ADDRESS opcode handler +/// - Semantics: push the current executing contract's address (tx.to) as a 32-byte left-padded value. +/// - Implementation: pads the 20-byte address into a 32-byte big-endian word and pushes it. pub fn address(evm: &mut Evm) { let address: Address = evm.tx.to; - + let mut padded = [0u8; 32]; // length is 32 bytes - - // the address is 20bytes long, hence, padded with zero - padded[12..].copy_from_slice(address.as_slice()); // - + + // the address is 20bytes long, hence, padded with zero + padded[12..].copy_from_slice(address.as_slice()); // + let value = U256::from_be_bytes(padded); evm.stack.push(value).unwrap(); } +/// BALANCE opcode handler +/// - Semantics: push the balance of the account (usually the account specified by `evm.tx.from` here). +/// - Note: this implementation unwraps the account entry; pub fn balance(evm: &mut Evm) { let address: Address = evm.tx.from; - + let address_account = evm.storage.data.get(&address).unwrap(); let balance: U256 = address_account.balance; evm.stack.push(balance).unwrap(); } +/// ORIGIN opcode handler +/// - Semantics: push the transaction origin address (tx.from) padded to 32 bytes. +/// - Implementation mirrors `address` logic but uses `tx.from`. pub fn origin(evm: &mut Evm) { let address: Address = evm.tx.from; - + let mut padded = [0u8; 32]; // length is 32 bytes - - // the address is 20bytes long, hence, padded with zero - padded[12..].copy_from_slice(address.as_slice()); // - + + // the address is 20bytes long, hence, padded with zero + padded[12..].copy_from_slice(address.as_slice()); // + let value = U256::from_be_bytes(padded); evm.stack.push(value).unwrap(); } - +/// CALLVALUE opcode handler +/// - Semantics: push the `tx.value` (amount of wei sent with the call). pub fn call_value(evm: &mut Evm) { let value = evm.tx.value; evm.stack.push(value).unwrap(); } +/// CALLDATALOAD partial handler +/// - Semantics: intended to pop offset and push 32 bytes starting from `tx.data[offset]`. +/// - Implementation note: this function reads the offset and prepares to use `tx.data` but the final conversion +/// into a `U256` is left commented out. This must be completed to match EVM semantics and handle out-of-bounds reads. pub fn call_data_load(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); let offset = offset.as_limbs()[0] as usize; - + let data = evm.tx.data.as_slice(); // let value = U256::from_be_bytes(data[offset..offset + 32].); // evm.stack.push(value).unwrap(); } +/// GASPRICE opcode handler (simplified) +/// - Implementation pushes `tx.gas_limit` as a stand-in for gas price (this is not the usual meaning). +/// - In EVM semantics GASPRICE should push `tx.gas_price` or chain gas price; adjust accordingly. pub fn gas_price(evm: &mut Evm) { let gas_price = evm.tx.gas_limit; evm.stack.push(gas_price).unwrap(); } +/// BLOCKHASH opcode handler (partial) +/// - Semantics: pop block number `n`, if `n` is within the last 256 blocks return blockhash(n) else 0. +/// - Implementation: checks if requested block number is greater than current block number and pushes 0 if so. +/// - Note: full historical block-hash semantics are not implemented here. pub fn block_hash(evm: &mut Evm) { // get the request block number from the stack let block_number = evm.stack.pop().unwrap(); - + // get the current block number from the block environment let current_block_number = evm.block_env.number; - - // check if the requested block number + + // check if the requested block number // is within the range of the current block number if block_number > current_block_number { evm.stack.push(U256::ZERO).unwrap(); } else { - // get the block hash from the block environment let block_hash = evm.block_env.block_hash.as_limbs()[0]; - + // evm.stack.push(block_hash).unwrap(); } - } +/// COINBASE opcode handler +/// - Semantics: push the block coinbase/miner address as 32 bytes. pub fn coin_base(evm: &mut Evm) { let coin_base = evm.block_env.coinbase; - - evm.stack.push(U256::from_be_slice(coin_base.as_slice())).unwrap(); + + evm.stack + .push(U256::from_be_slice(coin_base.as_slice())) + .unwrap(); } +/// TIMESTAMP opcode handler +/// - Semantics: push current block timestamp. pub fn timestamp(evm: &mut Evm) { let timestamp = evm.block_env.timestamp; - + evm.stack.push(timestamp).unwrap(); } +/// NUMBER opcode handler +/// - Semantics: push current block number. pub fn number(evm: &mut Evm) { let number = evm.block_env.number; - + evm.stack.push(number).unwrap(); } +/// GASLIMIT opcode handler +/// - Semantics: push current block gas limit. pub fn gas_limit(evm: &mut Evm) { let gas_limit = evm.block_env.gas_limit; - + evm.stack.push(gas_limit).unwrap(); } +/// CHAINID opcode handler +/// - Semantics: push chain id. pub fn chain_id(evm: &mut Evm) { let chain_id = evm.block_env.chain_id; - + evm.stack.push(chain_id).unwrap(); } +/// POP opcode handler +/// - Semantics: remove the top stack element and discard it. pub fn pop(evm: &mut Evm) { evm.stack.pop().unwrap(); } +/// MLOAD opcode handler +/// - Semantics: pop offset, load 32-byte word from memory starting at offset, push that word. +/// - Note: `load_word` assumes memory has enough bytes; ensure memory is grown appropriately. pub fn m_load(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); - + let word = evm.memory.load_word(offset.as_limbs()[0] as usize); - + evm.stack.push(word).unwrap(); } +/// MSTORE opcode handler (alternate) +/// - Semantics: pop offset, pop value, store the 32-byte word at memory[offset]. +/// - Note: similar to `mstore` above; ensure memory length suffices. pub fn m_store(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); let value = evm.stack.pop().unwrap(); - + evm.memory.store_word(offset.as_limbs()[0] as usize, value); } +/// MSTORE8 opcode handler +/// - Semantics: pop offset, pop value, store the least-significant byte of value at memory[offset]. pub fn m_store8(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); let value = evm.stack.pop().unwrap(); - - evm.memory.store_byte(offset.as_limbs()[0] as usize, value.as_limbs()[0] as u8); + + evm.memory + .store_byte(offset.as_limbs()[0] as usize, value.as_limbs()[0] as u8); } -pub fn s_load(evm: &mut Evm){ +/// SLOAD opcode handler (partial) +/// - Semantics: pop storage slot key, load value from persistent storage for the executing contract address. +/// - Note: this implementation reads from `storage` using `evm.tx.to` as the contract address; callers must ensure +/// that `storage` contains an account entry for that address. +pub fn s_load(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); - + let locator: Address = evm.tx.to; - + let word = evm.storage.s_load(locator, offset); - + // evm.stack.push(word).unwrap(); } -pub fn s_store(evm: &mut Evm){ +/// SSTORE opcode handler (partial) +/// - Semantics: pop offset, pop value, store value into persistent storage at slot `offset` for the current contract address. +pub fn s_store(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); let value = evm.stack.pop().unwrap(); - + let locator: Address = evm.tx.to; - + evm.storage.s_store(locator, offset, value); } +/// JUMP opcode handler +/// - Semantics: pop target and set `pc` to that value (absolute jump). +/// - Note: real EVM requires target to be a valid `JUMPDEST`; validation is not performed here. pub fn jump(evm: &mut Evm) { let target = evm.stack.pop().unwrap(); - + evm.pc = target.as_limbs()[0] as usize; } +/// JUMPI opcode handler +/// - Semantics: pop target, pop condition. If condition != 0, set `pc = target` (conditional jump). pub fn jumpi(evm: &mut Evm) { let target = evm.stack.pop().unwrap(); let condition = evm.stack.pop().unwrap(); - + if condition.as_limbs()[0] != 0 { evm.pc = target.as_limbs()[0] as usize; } } +/// JUMPDEST handler (no-op in many implementations) +/// - Semantics: marks a valid destination for `JUMP`/`JUMPI`. Here it does nothing. pub fn jump_dest(evm: &mut Evm) { let pc = evm.pc; } - +/// PC opcode handler +/// - Semantics: push current program counter. This implementation currently reads `evm.pc` but doesn't push it. pub fn pc(evm: &mut Evm) { evm.pc; } +/// MSIZE opcode handler +/// - Semantics: push memory size in bytes. This implementation reads `memory.data.len()` but doesn't push it. pub fn m_size(evm: &mut Evm) { evm.memory.data.len(); } +/// GAS opcode handler (partial) +/// - Semantics: push remaining gas. This implementation returns block_env.gas_limit which is not correct gas accounting. pub fn gas(evm: &mut Evm) { evm.block_env.gas_limit; } +/// MCOPY opcode handler (partial) +/// - Semantics: copy memory region; this implementation reads stack operands but the actual copy is commented out. pub fn m_copy(evm: &mut Evm) { let offset = evm.stack.pop().unwrap(); let length = evm.stack.pop().unwrap(); let dest = evm.stack.pop().unwrap(); - + // evm.memory.copy(offset.as_limbs()[0] as usize, dest.as_limbs()[0] as usize, length.as_limbs()[0] as usize); - } +/// PUSH0 opcode handler (special PUSH of zero) +/// - Semantics: push zero onto the stack. pub fn push_0(evm: &mut Evm) { evm.stack.push(U256::ZERO).unwrap(); } - - diff --git a/crates/primitives/src/evm_types.rs b/crates/primitives/src/evm_types.rs index 46e76fb..b63de06 100644 --- a/crates/primitives/src/evm_types.rs +++ b/crates/primitives/src/evm_types.rs @@ -28,7 +28,7 @@ pub struct BlockEnv { pub struct EvmAccount { pub balance: U256, pub nonce: U256, - pub code: Vec, + pub code: Vec, pub word: HashMap, } @@ -36,4 +36,3 @@ pub struct EvmAccount { pub struct EvmStorage { pub data: HashMap, } - diff --git a/crates/primitives/src/lib.rs b/crates/primitives/src/lib.rs index 21ec4bb..0c05174 100644 --- a/crates/primitives/src/lib.rs +++ b/crates/primitives/src/lib.rs @@ -1,9 +1,9 @@ +pub mod constants; pub mod errors; pub mod evm_types; pub mod memory; pub mod stack; pub mod storage; -pub mod constants; pub fn add(left: u64, right: u64) -> u64 { left + right diff --git a/crates/primitives/src/memory.rs b/crates/primitives/src/memory.rs index 8cd8d5d..f2f0008 100644 --- a/crates/primitives/src/memory.rs +++ b/crates/primitives/src/memory.rs @@ -23,9 +23,9 @@ impl Memory { } pub fn load_word(&self, offset: usize) -> U256 { - let bytes = &self.data[offset..offset + 32]; - - U256::from_be_slice(bytes.try_into().unwrap()) + let bytes = &self.data[offset..offset + 32]; + + U256::from_be_slice(bytes.try_into().unwrap()) } pub fn store_byte(&mut self, offset: usize, byte: u8) { @@ -35,7 +35,7 @@ impl Memory { pub fn load_byte(&self, offset: usize) -> u8 { self.data[offset] } - + pub fn copy(&mut self, offset: usize, dest: usize, length: usize) -> u8 { let data = &self.data[offset..offset + length]; 0 diff --git a/crates/primitives/src/storage.rs b/crates/primitives/src/storage.rs index 72f5560..b815eb0 100644 --- a/crates/primitives/src/storage.rs +++ b/crates/primitives/src/storage.rs @@ -1,24 +1,28 @@ - - use std::collections::HashMap; -use alloy::primitives::{U256, Address}; +use alloy::primitives::{Address, U256}; use crate::evm_types::{EvmAccount, EvmStorage}; impl EvmStorage { - pub fn default() -> Self { - EvmStorage { data: HashMap::new() } + EvmStorage { + data: HashMap::new(), + } } - + pub fn s_load(&mut self, address: Address, key: U256) -> U256 { - self.data.get(&address).and_then(|evm_account: &EvmAccount| { - evm_account.word.get(&key).copied() - }).unwrap() + self.data + .get(&address) + .and_then(|evm_account: &EvmAccount| evm_account.word.get(&key).copied()) + .unwrap() } - + pub fn s_store(&mut self, address: Address, key: U256, value: U256) { - self.data.entry(address).or_insert_with(EvmAccount::default).word.insert(key, value); + self.data + .entry(address) + .or_insert_with(EvmAccount::default) + .word + .insert(key, value); } -} \ No newline at end of file +}