From b4c76e013c726830972304cb93f8ad0e9018b585 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 25 Apr 2026 17:30:28 +0200 Subject: [PATCH 01/45] Mark r10 and r11 as pre-defined registers In some games these registers are used for local variables while not being callee-saved. If those local variables also happen to be used without an initial value, then dsd would assume the instruction is illegal. It is undefined behavior however, but not illegal. --- lib/src/analysis/functions.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index b695655..8a1312c 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -661,6 +661,9 @@ impl<'a> ParseFunctionContext<'a> { defined_registers.insert(Register::Pc); // Could be used as a scratch register defined_registers.insert(Register::R12); + // Sometimes not callee-saved + defined_registers.insert(Register::R10); + defined_registers.insert(Register::R11); Self { name, From 2bc1c02923d7fd7b3c10e7f6d5cc82bbdfbba1a8 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 25 Apr 2026 17:31:51 +0200 Subject: [PATCH 02/45] Support Thumb jump table case `bx {dest}` It seems older versions of the compiler doesn't use `add pc, {offset}` for the branching instruction --- lib/src/analysis/jump_table.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lib/src/analysis/jump_table.rs b/lib/src/analysis/jump_table.rs index d4cd9d7..a020f07 100644 --- a/lib/src/analysis/jump_table.rs +++ b/lib/src/analysis/jump_table.rs @@ -233,8 +233,12 @@ pub enum JumpTableStateThumb { SignExtendAsr { jump: Register, table_address: u32, limit: u32 }, /// `add pc, jump` do the jump + /// `add jump, pc` calculate the jump destination AddPcReg { jump: Register, table_address: u32, limit: u32 }, + /// `bx jump` jump to the destination + BxJump { jump: Register, table_address: u32, limit: u32 }, + /// valid table detected, starts from `table_address` with a size of `limit` ValidJumpTable { table_address: u32, limit: u32 }, } @@ -412,6 +416,27 @@ impl JumpTableStateThumb { }); Self::ValidJumpTable { table_address, limit } } + ( + "add", + Argument::Reg(Reg { reg, .. }), + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::None, + ) if reg == jump => Self::BxJump { jump, table_address, limit }, + _ => Self::default(), + } + } + Self::BxJump { jump, table_address, limit } => { + match (parsed_ins.mnemonic, args[0], args[1]) { + ("bx", Argument::Reg(Reg { reg, .. }), Argument::None) if reg == jump => { + let table_address = table_address - 2; + let size = (limit + 1) * 2; + jump_tables.insert(table_address, JumpTable { + address: table_address, + size, + code: false, + }); + Self::ValidJumpTable { table_address, limit } + } _ => Self::default(), } } From 566c47de548afb90ba2e0709134c37a69dbe9a0f Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 25 Apr 2026 18:17:23 +0200 Subject: [PATCH 03/45] Add `eor pc, ...` as a possible return instruction --- lib/src/analysis/functions.rs | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 8a1312c..620f4e9 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -7,7 +7,7 @@ use std::{ use snafu::Snafu; use unarm::{ ArmVersion, Endian, Ins, ParseFlags, ParseMode, ParsedIns, Parser, - args::{Argument, Reg, Register}, + args::{Argument, Reg, Register, Shift, ShiftReg}, arm, thumb, }; @@ -1043,17 +1043,21 @@ impl<'a> ParseFunctionContext<'a> { } let args = &parsed_ins.args; - match (parsed_ins.mnemonic, args[0], args[1]) { + match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3]) { // bx * - ("bx", _, _) => true, + ("bx", _, _, _, _) => true, // mov pc, * - ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true, + ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _, _, _) => true, // ldmia *, {..., pc} - ("ldmia", _, Argument::RegList(reg_list)) if reg_list.contains(Register::Pc) => true, + ("ldmia", _, Argument::RegList(reg_list), _, _) if reg_list.contains(Register::Pc) => { + true + } // pop {..., pc} - ("pop", Argument::RegList(reg_list), _) if reg_list.contains(Register::Pc) => true, + ("pop", Argument::RegList(reg_list), _, _, _) if reg_list.contains(Register::Pc) => { + true + } // backwards branch - ("b", Argument::BranchDest(offset), _) if offset < 0 => { + ("b", Argument::BranchDest(offset), _, _, _) if offset < 0 => { // Branch must be within current function (infinite loop) or outside current module (tail call) Function::is_branch(ins, parsed_ins, address) .map(|destination| { @@ -1068,9 +1072,21 @@ impl<'a> ParseFunctionContext<'a> { "subs", Argument::Reg(Reg { reg: Register::Pc, .. }), Argument::Reg(Reg { reg: Register::Lr, .. }), + _, + _, ) => true, // ldr pc, * - ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true, + ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _, _, _) => true, + // eor pc, r*, r*, ror r* + // Yeah this makes no sense but it's real and exists at 0x020d2888 of ov022 in the + // European version of Mario & Luigi: Bowser's Inside Story + ( + "eor", + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(_), + Argument::Reg(_), + Argument::ShiftReg(ShiftReg { op: Shift::Ror, reg: _ }), + ) => true, _ => false, } } From bd6602c5f897047cdc74678779c520ecece53be5 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 11:02:13 +0200 Subject: [PATCH 04/45] Only set last conditional destination if branch was conditional --- lib/src/analysis/functions.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 620f4e9..6d0beee 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -828,7 +828,14 @@ impl<'a> ParseFunctionContext<'a> { } } else { // Normal branch instruction, insert a label - if let Some(state) = self.handle_label(destination, address, parser, ins_size) { + if let Some(state) = self.handle_label( + destination, + address, + parser, + ins_size, + ins, + in_conditional_block, + ) { return state; } } @@ -947,10 +954,14 @@ impl<'a> ParseFunctionContext<'a> { address: u32, parser: &mut Parser, ins_size: u32, + ins: Ins, + in_conditional_block: bool, ) -> Option { self.labels.insert(destination); - self.last_conditional_destination = - self.last_conditional_destination.max(Some(destination)); + if in_conditional_block || ins.is_conditional() { + self.last_conditional_destination = + self.last_conditional_destination.max(Some(destination)); + } let next_address = address + ins_size; if self.pool_constants.contains(&next_address) { From 7fd33bd5c199ad03a79d68b700b7970d6bd1ffa4 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 11:02:55 +0200 Subject: [PATCH 05/45] Add `add pc, ...` as a possible return instruction --- lib/src/analysis/functions.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 6d0beee..0e306e8 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -7,7 +7,7 @@ use std::{ use snafu::Snafu; use unarm::{ ArmVersion, Endian, Ins, ParseFlags, ParseMode, ParsedIns, Parser, - args::{Argument, Reg, Register, Shift, ShiftReg}, + args::{Argument, Reg, Register, Shift, ShiftImm, ShiftReg}, arm, thumb, }; @@ -1098,6 +1098,15 @@ impl<'a> ParseFunctionContext<'a> { Argument::Reg(_), Argument::ShiftReg(ShiftReg { op: Shift::Ror, reg: _ }), ) => true, + // add pc, r*, r*, lsl #* + // Another weird one from Bowser's Inside Story's ITCM module (0x01ff84f8 in EU version) + ( + "add", + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(_), + Argument::Reg(_), + Argument::ShiftImm(ShiftImm { op: Shift::Lsl, imm: _ }), + ) => true, _ => false, } } From 48f92b9b45e1f5ccf015e3318317251bcd038df5 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 11:03:53 +0200 Subject: [PATCH 06/45] Add all unknown function symbols and external labels before adding function call relocations --- cli/src/analysis/data.rs | 230 +++++++++++++++++++++++++------------- cli/src/config/program.rs | 2 +- 2 files changed, 154 insertions(+), 78 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index c7df7a7..3eb0e2c 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -1,10 +1,10 @@ use ds_decomp::{ - analysis::functions::Function, + analysis::functions::{CalledFunction, Function}, config::{ module::{AnalysisOptions, Module, ModuleKind}, relocations::{Relocation, RelocationFromModulesError, RelocationModule}, section::{SectionCodeError, SectionIndex, SectionKind}, - symbol::{SymbolMapError, SymbolMaps}, + symbol::{SymFunction, SymLabel, SymbolKind, SymbolMapError, SymbolMaps}, }, }; use snafu::Snafu; @@ -21,6 +21,15 @@ pub enum AnalyzeExternalReferencesError { "Local function call from {from:#010x} in {module_kind} to {to:#010x} leads to no function" ))] LocalFunctionNotFound { from: u32, to: u32, module_kind: ModuleKind }, + #[snafu(display( + "Function call from {from:#010x} in {from_module} to {to:#010x} in {to_module} leads to a non-function symbol" + ))] + InvalidCallDestinationSymbol { + from: u32, + to: u32, + from_module: ModuleKind, + to_module: ModuleKind, + }, #[snafu(transparent)] SymbolMap { source: SymbolMapError }, #[snafu(transparent)] @@ -30,18 +39,12 @@ pub enum AnalyzeExternalReferencesError { } pub fn analyze_external_references( - options: AnalyzeExternalReferencesOptions, + options: &mut AnalyzeExternalReferencesOptions, analysis_options: &AnalysisOptions, ) -> Result { - let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - let mut result = RelocationResult::new(); - find_relocations_in_functions( - &mut result, - AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps }, - analysis_options, - )?; - find_external_references_in_sections(modules, module_index, &mut result)?; + find_relocations_in_functions(&mut result, options, analysis_options)?; + find_external_references_in_sections(options.modules, options.module_index, &mut result)?; Ok(result) } @@ -68,107 +71,180 @@ fn find_external_references_in_sections( fn find_relocations_in_functions( result: &mut RelocationResult, - options: AnalyzeExternalReferencesOptions, + options: &mut AnalyzeExternalReferencesOptions, analysis_options: &AnalysisOptions, +) -> Result<(), AnalyzeExternalReferencesError> { + for section in options.modules[options.module_index].sections().iter() { + for function in section.functions().values() { + if analysis_options.allow_unknown_function_calls { + insert_unknown_function_symbols(function, options)?; + } + add_external_labels(function, options)?; + add_function_calls_as_relocations(function, result, options)?; + find_external_data_from_pools(options.modules, options.module_index, function, result)?; + } + } + Ok(()) +} + +fn iter_function_calls(function: &Function) -> impl Iterator { + function + .function_calls() + .iter() + // TODO: Condition code resets to AL for relocated call instructions + .filter(|(_, called_function)| !called_function.ins.is_conditional()) +} + +fn insert_unknown_function_symbols( + function: &Function, + options: &mut AnalyzeExternalReferencesOptions, ) -> Result<(), AnalyzeExternalReferencesError> { let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - for section in modules[module_index].sections().iter() { - for function in section.functions().values() { - add_function_calls_as_relocations( - function, - result, - AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps }, - analysis_options, - )?; - find_external_data_from_pools(modules, module_index, function, result)?; + for (&address, &called_function) in iter_function_calls(function) { + let local_module = &modules[*module_index]; + let is_local = + local_module.sections().get_by_contained_address(called_function.address).is_some(); + if !is_local { + continue; + } + + let module_kind = local_module.kind(); + let symbol_map = symbol_maps.get_mut(module_kind); + if symbol_map.get_function_containing(called_function.address).is_none() { + log::warn!( + "Local function call from {:#010x} in {} to {:#010x} leads to no function, inserting an unknown function symbol", + address, + module_kind, + called_function.address + ); + + let thumb_bit = if called_function.thumb { 1 } else { 0 }; + let function_address = called_function.address | thumb_bit; + + if symbol_map.get_function(function_address)?.is_none() { + let name = + format!("{}{:08x}_unk", local_module.default_func_prefix, function_address); + symbol_map.add_unknown_function(name, function_address, called_function.thumb); + } } } Ok(()) } -fn add_function_calls_as_relocations( +fn add_external_labels( function: &Function, - result: &mut RelocationResult, - options: AnalyzeExternalReferencesOptions, - analysis_options: &AnalysisOptions, + options: &mut AnalyzeExternalReferencesOptions, ) -> Result<(), AnalyzeExternalReferencesError> { let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - for (&address, &called_function) in function.function_calls() { - if called_function.ins.is_conditional() { - // Dumb mwld linker bug removes the condition code from relocated call instructions + for (&address, &called_function) in iter_function_calls(function) { + let local_module = &modules[*module_index]; + let is_local = + local_module.sections().get_by_contained_address(called_function.address).is_some(); + if !is_local { continue; } - let local_module = &modules[module_index]; + let module_kind = local_module.kind(); + let symbol_map = symbol_maps.get_mut(module_kind); + let symbol = match symbol_map.get_function_containing(called_function.address) { + Some((_, symbol)) => symbol, + None => { + let error = LocalFunctionNotFoundSnafu { + from: address, + to: called_function.address, + module_kind, + } + .build(); + log::error!("{error}"); + return Err(error); + } + }; + if called_function.address != symbol.addr { + log::warn!( + "Local function call from {:#010x} in {} to {:#010x} goes to middle of function '{}' at {:#010x}, adding an external label symbol", + address, + module_kind, + called_function.address, + symbol.name, + symbol.addr + ); + symbol_map.add_external_label(called_function.address, called_function.thumb)?; + } + } + Ok(()) +} + +fn add_function_calls_as_relocations( + function: &Function, + result: &mut RelocationResult, + options: &mut AnalyzeExternalReferencesOptions, +) -> Result<(), AnalyzeExternalReferencesError> { + let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; + + for (&address, &called_function) in iter_function_calls(function) { + let local_module = &modules[*module_index]; let is_local = local_module.sections().get_by_contained_address(called_function.address).is_some(); let module: RelocationModule = if is_local { let module_kind = local_module.kind(); let symbol_map = symbol_maps.get_mut(module_kind); - let symbol = match symbol_map.get_function_containing(called_function.address) { + let symbol = match symbol_map.by_address(called_function.address)? { Some((_, symbol)) => symbol, None => { - if !analysis_options.allow_unknown_function_calls { - let error = LocalFunctionNotFoundSnafu { - from: address, - to: called_function.address, - module_kind, - } - .build(); - log::error!("{error}"); - return Err(error); - } else { - log::warn!( - "Local function call from {:#010x} in {} to {:#010x} leads to no function, inserting an unknown function symbol", - address, - module_kind, - called_function.address - ); - - let thumb_bit = if called_function.thumb { 1 } else { 0 }; - let function_address = called_function.address | thumb_bit; - - if let Some((_, symbol)) = symbol_map.get_function(function_address)? { - symbol - } else { - let name = format!( - "{}{:08x}_unk", - local_module.default_func_prefix, function_address - ); - let (_, symbol) = symbol_map.add_unknown_function( - name, - function_address, - called_function.thumb, - ); - symbol - } + let error = LocalFunctionNotFoundSnafu { + from: address, + to: called_function.address, + module_kind, } + .build(); + log::error!("{error}"); + return Err(error); } }; - if called_function.address != symbol.addr { - log::warn!( - "Local function call from {:#010x} in {} to {:#010x} goes to middle of function '{}' at {:#010x}, adding an external label symbol", - address, - module_kind, - called_function.address, - symbol.name, - symbol.addr - ); - symbol_map.add_external_label(called_function.address, called_function.thumb)?; + match &symbol.kind { + SymbolKind::Function(_) | SymbolKind::Label(SymLabel { external: true, .. }) => {} + + SymbolKind::Label(SymLabel { external: false, .. }) + | SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => { + return InvalidCallDestinationSymbolSnafu { + from: address, + to: called_function.address, + from_module: module_kind, + to_module: module_kind, + } + .fail(); + } } module_kind.into() } else { let candidates = modules.iter().filter(|&module| { let symbol_map = symbol_maps.get(module.kind()).unwrap(); - let Some((function, _)) = symbol_map.get_function(called_function.address).unwrap() + let Some((_, symbol)) = symbol_map.by_address(called_function.address).unwrap() else { return false; }; - function.mode.into_thumb() == Some(called_function.thumb) + + let mode = match &symbol.kind { + SymbolKind::Function(SymFunction { mode, .. }) + | SymbolKind::Label(SymLabel { external: true, mode }) => mode, + + SymbolKind::Label(SymLabel { external: false, .. }) + | SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => return false, + }; + + mode.into_thumb() == Some(called_function.thumb) }); RelocationModule::from_modules(candidates)? }; @@ -176,7 +252,7 @@ fn add_function_calls_as_relocations( if module == RelocationModule::None { log::warn!( "No functions from {address:#010x} in {} to {:#010x}:", - modules[module_index].kind(), + modules[*module_index].kind(), called_function.address ); } diff --git a/cli/src/config/program.rs b/cli/src/config/program.rs index d6a539d..01c18e9 100644 --- a/cli/src/config/program.rs +++ b/cli/src/config/program.rs @@ -80,7 +80,7 @@ impl Program { for module_index in 0..self.modules.len() { let RelocationResult { relocations, external_symbols } = data::analyze_external_references( - AnalyzeExternalReferencesOptions { + &mut AnalyzeExternalReferencesOptions { modules: &self.modules, module_index, symbol_maps: &mut self.symbol_maps, From e9b9f5e4202fb23ccc232a1a2768df7cd6c80a37 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 11:10:41 +0200 Subject: [PATCH 07/45] Don't create relocations to local symbols --- cli/src/analysis/data.rs | 56 +++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index 3eb0e2c..71f94fa 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -44,26 +44,26 @@ pub fn analyze_external_references( ) -> Result { let mut result = RelocationResult::new(); find_relocations_in_functions(&mut result, options, analysis_options)?; - find_external_references_in_sections(options.modules, options.module_index, &mut result)?; + find_external_references_in_sections(options, &mut result)?; Ok(result) } fn find_external_references_in_sections( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - for section in modules[module_index].sections().iter() { + let o = options; + for section in o.modules[o.module_index].sections().iter() { match section.kind() { SectionKind::Data | SectionKind::Rodata => {} SectionKind::Code | SectionKind::Bss => continue, } let code = section - .code(modules[module_index].code(), modules[module_index].base_address())? + .code(o.modules[o.module_index].code(), o.modules[o.module_index].base_address())? .unwrap(); for word in section.iter_words(code, None) { - find_external_data(modules, module_index, word.address, word.value, result)?; + find_external_data(o, word.address, word.value, result)?; } } Ok(()) @@ -81,7 +81,7 @@ fn find_relocations_in_functions( } add_external_labels(function, options)?; add_function_calls_as_relocations(function, result, options)?; - find_external_data_from_pools(options.modules, options.module_index, function, result)?; + find_external_data_from_pools(options, function, result)?; } } Ok(()) @@ -277,44 +277,38 @@ fn add_function_calls_as_relocations( } fn find_external_data_from_pools( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, function: &Function, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - let module = &modules[module_index]; + let module = &options.modules[options.module_index]; for pool_constant in function.iter_pool_constants(module.code(), module.base_address()) { - find_external_data( - modules, - module_index, - pool_constant.address, - pool_constant.value, - result, - )?; + find_external_data(options, pool_constant.address, pool_constant.value, result)?; } Ok(()) } fn find_external_data( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, address: u32, pointer: u32, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - let local_module = &modules[module_index]; + let o = options; + + let local_module = &o.modules[o.module_index]; let is_local = local_module.sections().get_by_contained_address(pointer).is_some(); if is_local { return Ok(()); } - let candidates = find_symbol_candidates(modules, module_index, pointer); + let candidates = find_symbol_candidates(o, pointer); if candidates.is_empty() { // Probably not a pointer return Ok(()); } - let candidate_modules = candidates.iter().map(|c| &modules[c.module_index]); + let candidate_modules = candidates.iter().map(|c| &o.modules[c.module_index]); let module = RelocationModule::from_modules(candidate_modules)?; result.relocations.push(Relocation::new_load(address, pointer, 0, module)); @@ -323,15 +317,15 @@ fn find_external_data( } fn find_symbol_candidates( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, pointer: u32, ) -> Vec { - modules + options + .modules .iter() .enumerate() .filter_map(|(index, module)| { - if index == module_index { + if index == options.module_index { return None; } let (section_index, section) = module.sections().get_by_contained_address(pointer)?; @@ -342,7 +336,15 @@ fn find_symbol_candidates( return None; } } - Some(SymbolCandidate { module_index: index, section_index }) + let symbol_map = options.symbol_maps.get(module.kind()).unwrap(); + if let Some((_, symbol)) = symbol_map.by_address(pointer).unwrap() + && symbol.local + { + // Existing symbol is local, so it can't be referred to by a relocation + None + } else { + Some(SymbolCandidate { module_index: index, section_index }) + } }) .collect::>() } From fb0d8e481f1375da8d1af92f46cd00bc12672a7c Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 11:29:29 +0200 Subject: [PATCH 08/45] Define disjoint overlays as static overlays The base address of a static overlay does not come directly after the end address of another overlay. Previously, we did not allow such gaps between overlays. But since some games have gaps, those overlays are now allowed and will be placed at an exact address by the LCF. --- cli/src/analysis/overlay_groups.rs | 35 +++++++++++++++++++++++++----- cli/src/cmd/lcf.rs | 30 +++++++++++++------------ 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/cli/src/analysis/overlay_groups.rs b/cli/src/analysis/overlay_groups.rs index 3cdff2e..9dc8aa3 100644 --- a/cli/src/analysis/overlay_groups.rs +++ b/cli/src/analysis/overlay_groups.rs @@ -1,4 +1,4 @@ -use anyhow::{Result, bail}; +use anyhow::Result; use ds_rom::rom::Overlay; pub struct OverlayGroups { @@ -12,7 +12,13 @@ pub struct OverlayGroup { pub start_address: u32, pub end_address: u32, pub overlays: Vec, - pub after: Vec, + pub location: OverlayGroupLocation, +} + +pub enum OverlayGroupLocation { + AfterStatic, // after ARM9 and custom autoloads + After(Vec), // after other overlays + Static, // static address } impl OverlayGroups { @@ -42,14 +48,33 @@ impl OverlayGroups { start_address: static_end_address, end_address: first_group_end, overlays: first_group, - after: vec![], + location: OverlayGroupLocation::AfterStatic, }]; let mut new_group = vec![]; let mut groups_to_connect = vec![0u16]; // list of groups (indices) which may be preceded by ungrouped overlays while !ungrouped_overlays.is_empty() { let Some(connect_index) = groups_to_connect.pop() else { - bail!("No more overlay groups to connect to, are there gaps between overlays?"); + log::warn!( + "No more overlay groups to connect to after {:#010x} as there are gaps between overlays. Adding remaining overlays as static overlays: {}", + groups.last().unwrap().end_address, + ungrouped_overlays + .iter() + .map(|id| id.to_string()) + .collect::>() + .join(", ") + ); + for id in ungrouped_overlays { + let overlay = &overlays[id as usize]; + groups.push(OverlayGroup { + index: groups.len() as u16, + start_address: overlay.base_address(), + end_address: overlay.end_address(), + overlays: vec![id], + location: OverlayGroupLocation::Static, + }); + } + break; }; let connect_index = connect_index as usize; @@ -82,7 +107,7 @@ impl OverlayGroups { start_address: overlay_end, end_address: group_end, overlays: new_group, - after, + location: OverlayGroupLocation::After(after), }); groups_to_connect.push(index); diff --git a/cli/src/cmd/lcf.rs b/cli/src/cmd/lcf.rs index fb85b6c..fdd5ef6 100644 --- a/cli/src/cmd/lcf.rs +++ b/cli/src/cmd/lcf.rs @@ -16,7 +16,7 @@ use strum::IntoEnumIterator as _; use tinytemplate::TinyTemplate; use crate::{ - analysis::overlay_groups::OverlayGroups, + analysis::overlay_groups::{OverlayGroupLocation, OverlayGroups}, config::{ delinks::{DelinksMap, DelinksMapOptions}, section::SectionExt, @@ -358,20 +358,22 @@ impl LinkModules { log::debug!("Static end address: {static_end_address:#010x}"); let overlay_groups = OverlayGroups::analyze(static_end_address, rom.arm9_overlays())?; for group in overlay_groups.iter() { - let origin = if group.after.is_empty() { - let last_static_module = link_modules.last_static_module(); - format!("AFTER({})", last_static_module.name) - } else { - format!( - "AFTER({})", - group - .after - .iter() - .map(|id| format!("OV{id:03}")) - .collect::>() - .join(", ") - ) + let origin = match &group.location { + OverlayGroupLocation::AfterStatic => { + let last_static_module = link_modules.last_static_module(); + format!("AFTER({})", last_static_module.name) + } + OverlayGroupLocation::After(ids) => { + format!( + "AFTER({})", + ids.iter().map(|id| format!("OV{id:03}")).collect::>().join(", ") + ) + } + OverlayGroupLocation::Static => { + format!("{:#010x}", group.start_address) + } }; + for &overlay_id in &group.overlays { let kind = ModuleKind::Overlay(overlay_id); link_modules.modules.push(LcfModule::new( From ed344e19734f1f096cba893af2f847c402023b34 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 13:00:10 +0200 Subject: [PATCH 09/45] Find external labels and unknown functions earlier --- cli/src/analysis/data.rs | 91 +---------------------- cli/src/config/program.rs | 13 ++-- lib/src/analysis/data.rs | 149 ++++++++++++++++++++++++++++++++++---- lib/src/config/module.rs | 11 ++- 4 files changed, 150 insertions(+), 114 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index 71f94fa..3b5f352 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -1,7 +1,7 @@ use ds_decomp::{ analysis::functions::{CalledFunction, Function}, config::{ - module::{AnalysisOptions, Module, ModuleKind}, + module::{Module, ModuleKind}, relocations::{Relocation, RelocationFromModulesError, RelocationModule}, section::{SectionCodeError, SectionIndex, SectionKind}, symbol::{SymFunction, SymLabel, SymbolKind, SymbolMapError, SymbolMaps}, @@ -40,10 +40,9 @@ pub enum AnalyzeExternalReferencesError { pub fn analyze_external_references( options: &mut AnalyzeExternalReferencesOptions, - analysis_options: &AnalysisOptions, ) -> Result { let mut result = RelocationResult::new(); - find_relocations_in_functions(&mut result, options, analysis_options)?; + find_relocations_in_functions(&mut result, options)?; find_external_references_in_sections(options, &mut result)?; Ok(result) } @@ -72,14 +71,9 @@ fn find_external_references_in_sections( fn find_relocations_in_functions( result: &mut RelocationResult, options: &mut AnalyzeExternalReferencesOptions, - analysis_options: &AnalysisOptions, ) -> Result<(), AnalyzeExternalReferencesError> { for section in options.modules[options.module_index].sections().iter() { for function in section.functions().values() { - if analysis_options.allow_unknown_function_calls { - insert_unknown_function_symbols(function, options)?; - } - add_external_labels(function, options)?; add_function_calls_as_relocations(function, result, options)?; find_external_data_from_pools(options, function, result)?; } @@ -95,87 +89,6 @@ fn iter_function_calls(function: &Function) -> impl Iterator Result<(), AnalyzeExternalReferencesError> { - let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - - for (&address, &called_function) in iter_function_calls(function) { - let local_module = &modules[*module_index]; - let is_local = - local_module.sections().get_by_contained_address(called_function.address).is_some(); - if !is_local { - continue; - } - - let module_kind = local_module.kind(); - let symbol_map = symbol_maps.get_mut(module_kind); - if symbol_map.get_function_containing(called_function.address).is_none() { - log::warn!( - "Local function call from {:#010x} in {} to {:#010x} leads to no function, inserting an unknown function symbol", - address, - module_kind, - called_function.address - ); - - let thumb_bit = if called_function.thumb { 1 } else { 0 }; - let function_address = called_function.address | thumb_bit; - - if symbol_map.get_function(function_address)?.is_none() { - let name = - format!("{}{:08x}_unk", local_module.default_func_prefix, function_address); - symbol_map.add_unknown_function(name, function_address, called_function.thumb); - } - } - } - Ok(()) -} - -fn add_external_labels( - function: &Function, - options: &mut AnalyzeExternalReferencesOptions, -) -> Result<(), AnalyzeExternalReferencesError> { - let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - - for (&address, &called_function) in iter_function_calls(function) { - let local_module = &modules[*module_index]; - let is_local = - local_module.sections().get_by_contained_address(called_function.address).is_some(); - if !is_local { - continue; - } - - let module_kind = local_module.kind(); - let symbol_map = symbol_maps.get_mut(module_kind); - let symbol = match symbol_map.get_function_containing(called_function.address) { - Some((_, symbol)) => symbol, - None => { - let error = LocalFunctionNotFoundSnafu { - from: address, - to: called_function.address, - module_kind, - } - .build(); - log::error!("{error}"); - return Err(error); - } - }; - if called_function.address != symbol.addr { - log::warn!( - "Local function call from {:#010x} in {} to {:#010x} goes to middle of function '{}' at {:#010x}, adding an external label symbol", - address, - module_kind, - called_function.address, - symbol.name, - symbol.addr - ); - symbol_map.add_external_label(called_function.address, called_function.thumb)?; - } - } - Ok(()) -} - fn add_function_calls_as_relocations( function: &Function, result: &mut RelocationResult, diff --git a/cli/src/config/program.rs b/cli/src/config/program.rs index 01c18e9..1b1f486 100644 --- a/cli/src/config/program.rs +++ b/cli/src/config/program.rs @@ -79,14 +79,11 @@ impl Program { pub fn analyze_cross_references(&mut self, options: &AnalysisOptions) -> Result<()> { for module_index in 0..self.modules.len() { let RelocationResult { relocations, external_symbols } = - data::analyze_external_references( - &mut AnalyzeExternalReferencesOptions { - modules: &self.modules, - module_index, - symbol_maps: &mut self.symbol_maps, - }, - options, - )?; + data::analyze_external_references(&mut AnalyzeExternalReferencesOptions { + modules: &self.modules, + module_index, + symbol_maps: &mut self.symbol_maps, + })?; let module_relocations = self.modules[module_index].relocations_mut(); for reloc in relocations { diff --git a/lib/src/analysis/data.rs b/lib/src/analysis/data.rs index e6d8892..b606985 100644 --- a/lib/src/analysis/data.rs +++ b/lib/src/analysis/data.rs @@ -3,16 +3,16 @@ use std::{collections::BTreeMap, ops::Range}; use snafu::Snafu; use crate::{ - analysis::functions::Function, + analysis::functions::{CalledFunction, Function}, config::{ Comments, - module::{AnalysisOptions, ModuleKind}, + module::{AnalysisOptions, Module, ModuleKind}, relocations::{ Relocation, RelocationKind, RelocationModule, RelocationOptions, Relocations, RelocationsError, }, section::{Section, SectionKind, Sections}, - symbol::{SymBss, SymData, SymbolMap, SymbolMapError}, + symbol::{SymBss, SymData, SymbolKind, SymbolMap, SymbolMapError}, }, function, }; @@ -31,6 +31,10 @@ pub struct FindLocalDataOptions<'a> { #[derive(Debug, Snafu)] pub enum FindLocalDataError { + #[snafu(display( + "Local function call from {from:#010x} in {module_kind} to {to:#010x} leads to no function" + ))] + LocalFunctionNotFound { from: u32, to: u32, module_kind: ModuleKind }, #[snafu(transparent)] SymbolMap { source: SymbolMapError }, #[snafu(transparent)] @@ -73,21 +77,38 @@ pub fn find_local_data_from_pools( // Not a pointer, or points to a different module continue; }; - let function = symbol_map.get_function(pointer & !1)?; + let symbol = symbol_map.by_address(pointer & !1)?; if section.kind() == SectionKind::Code - && let Some((function, _)) = function + && let Some((_, symbol)) = symbol { let thumb = (pointer & 1) != 0; - if function.mode.into_thumb() != Some(thumb) { - // Instruction mode must match - continue; - } + let symbol_thumb = match &symbol.kind { + SymbolKind::Function(function) => function.mode.into_thumb(), + SymbolKind::Label(label) => { + if label.external { + label.mode.into_thumb() + } else { + None + } + } + SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => None, + }; + if let Some(symbol_thumb) = symbol_thumb { + if symbol_thumb != thumb { + // Instruction mode must match + continue; + } - // Relocate function pointer - let reloc = - relocations.add_load(pool_constant.address, pointer, 0, module_kind.into())?; - if analysis_options.provide_reloc_source { - reloc.comments.post_comment = Some(function!().to_string()); + // Relocate function pointer + let reloc = + relocations.add_load(pool_constant.address, pointer, 0, module_kind.into())?; + if analysis_options.provide_reloc_source { + reloc.comments.post_comment = Some(function!().to_string()); + } } } else { add_symbol_from_pointer( @@ -209,3 +230,103 @@ fn add_symbol_from_pointer( Ok(()) } + +pub fn find_function_labels( + module: &Module, + symbol_map: &mut SymbolMap, + options: &AnalysisOptions, +) -> Result<(), FindLocalDataError> { + for section in module.sections().iter() { + for function in section.functions().values() { + if options.allow_unknown_function_calls { + insert_unknown_function_symbols(function, module, symbol_map)?; + } + add_external_labels(function, module, symbol_map)?; + } + } + Ok(()) +} + +fn iter_function_calls(function: &Function) -> impl Iterator { + function + .function_calls() + .iter() + // TODO: Condition code resets to AL for relocated call instructions + .filter(|(_, called_function)| !called_function.ins.is_conditional()) +} + +fn insert_unknown_function_symbols( + function: &Function, + module: &Module, + symbol_map: &mut SymbolMap, +) -> Result<(), FindLocalDataError> { + for (&address, &called_function) in iter_function_calls(function) { + let local_module = module; + let is_local = + local_module.sections().get_by_contained_address(called_function.address).is_some(); + if !is_local { + continue; + } + + let module_kind = local_module.kind(); + if symbol_map.get_function_containing(called_function.address).is_none() { + log::warn!( + "Local function call from {:#010x} in {} to {:#010x} leads to no function, inserting an unknown function symbol", + address, + module_kind, + called_function.address + ); + + let thumb_bit = if called_function.thumb { 1 } else { 0 }; + let function_address = called_function.address | thumb_bit; + + if symbol_map.get_function(function_address)?.is_none() { + let name = + format!("{}{:08x}_unk", local_module.default_func_prefix, function_address); + symbol_map.add_unknown_function(name, function_address, called_function.thumb); + } + } + } + Ok(()) +} + +fn add_external_labels( + function: &Function, + module: &Module, + symbol_map: &mut SymbolMap, +) -> Result<(), FindLocalDataError> { + for (&address, &called_function) in iter_function_calls(function) { + let is_local = + module.sections().get_by_contained_address(called_function.address).is_some(); + if !is_local { + continue; + } + + let module_kind = module.kind(); + let symbol = match symbol_map.get_function_containing(called_function.address) { + Some((_, symbol)) => symbol, + None => { + let error = LocalFunctionNotFoundSnafu { + from: address, + to: called_function.address, + module_kind, + } + .build(); + log::error!("{error}"); + return Err(error); + } + }; + if called_function.address != symbol.addr { + log::warn!( + "Local function call from {:#010x} in {} to {:#010x} goes to middle of function '{}' at {:#010x}, adding an external label symbol", + address, + module_kind, + called_function.address, + symbol.name, + symbol.addr + ); + symbol_map.add_external_label(called_function.address, called_function.thumb)?; + } + } + Ok(()) +} diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index dd24350..01d5560 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -22,7 +22,7 @@ use super::{ use crate::{ analysis::{ ctor::{CtorRange, CtorRangeError}, - data::{self, FindLocalDataOptions}, + data::{self, FindLocalDataOptions, find_function_labels}, exception::{ExceptionData, ExceptionDataError}, functions::{ FindFunctionsOptions, Function, FunctionAnalysisError, FunctionParseOptions, @@ -198,6 +198,7 @@ impl Module { let symbol_map = symbol_maps.get_mut(module.kind); module.find_sections_arm9(symbol_map, &ctor_range, exception_data, arm9)?; + find_function_labels(&module, symbol_map, options)?; module.find_data_from_pools( symbol_map, options, @@ -272,6 +273,7 @@ impl Module { start: overlay.ctor_start(), end: overlay.ctor_end(), })?; + find_function_labels(&module, symbol_map, options)?; module.find_data_from_pools(symbol_map, options, None)?; module.find_data_from_sections(symbol_map, options)?; @@ -330,6 +332,7 @@ impl Module { let symbol_map = symbol_maps.get_mut(module.kind); module.find_sections_itcm(symbol_map)?; + find_function_labels(&module, symbol_map, options)?; module.find_data_from_pools(symbol_map, options, None)?; Ok(module) @@ -385,8 +388,10 @@ impl Module { let symbol_map = symbol_maps.get_mut(module.kind); module.find_sections_unknown_autoload(symbol_map, autoload)?; - module.find_data_from_pools(symbol_maps.get_mut(module.kind), options, None)?; - module.find_data_from_sections(symbol_maps.get_mut(module.kind), options)?; + + find_function_labels(&module, symbol_map, options)?; + module.find_data_from_pools(symbol_map, options, None)?; + module.find_data_from_sections(symbol_map, options)?; Ok(module) } From c4a47c2d96ec51b2de938e569f1bbb3f657444eb Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 13:04:32 +0200 Subject: [PATCH 10/45] Mark branches into functions as return only if the instruction modes match --- lib/src/analysis/functions.rs | 51 ++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 0e306e8..13e9be0 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -155,9 +155,10 @@ impl Function { fn function_parser_loop( mut parser: Parser<'_>, options: FunctionParseOptions, + found_functions: &BTreeMap, ) -> Result { let thumb = parser.mode == ParseMode::Thumb; - let mut context = ParseFunctionContext::new(thumb, options); + let mut context = ParseFunctionContext::new(thumb, options, found_functions); let Some((address, ins, parsed_ins)) = parser.next() else { return Err(FunctionAnalysisError::IntoFunction { @@ -216,7 +217,7 @@ impl Function { let parser = Parser::new(parse_mode, *start_address, Endian::Little, PARSE_FLAGS, function_code); - Self::function_parser_loop(parser, options) + Self::function_parser_loop(parser, options, &BTreeMap::new()) } pub fn find_functions( @@ -272,18 +273,22 @@ impl Function { (format!("{default_name_prefix}{address:08x}"), true) }; - let function_result = Function::function_parser_loop(parser, FunctionParseOptions { - name, - start_address: address, - base_address, - module_code, - known_end_address: None, - module_start_address, - module_end_address, - existing_functions: search_options.existing_functions, - check_defs_uses: search_options.check_defs_uses, - parse_options: Default::default(), - }); + let function_result = Function::function_parser_loop( + parser, + FunctionParseOptions { + name, + start_address: address, + base_address, + module_code, + known_end_address: None, + module_start_address, + module_end_address, + existing_functions: search_options.existing_functions, + check_defs_uses: search_options.check_defs_uses, + parse_options: Default::default(), + }, + &functions, + ); let function = match function_result { Ok(function) => function, Err(FunctionAnalysisError::IntoFunction { @@ -605,6 +610,7 @@ struct ParseFunctionContext<'a> { module_start_address: u32, module_end_address: u32, existing_functions: Option<&'a BTreeMap>, + found_functions: &'a BTreeMap, /// Address of last conditional instruction, so we can detect the final return instruction last_conditional_destination: Option, @@ -637,7 +643,11 @@ pub enum IntoFunctionError { } impl<'a> ParseFunctionContext<'a> { - pub fn new(thumb: bool, options: FunctionParseOptions<'a>) -> Self { + pub fn new( + thumb: bool, + options: FunctionParseOptions<'a>, + found_functions: &'a BTreeMap, + ) -> Self { let FunctionParseOptions { name, start_address, @@ -680,6 +690,7 @@ impl<'a> ParseFunctionContext<'a> { module_start_address, module_end_address, existing_functions, + found_functions, last_conditional_destination: None, last_pool_address: None, @@ -1069,6 +1080,16 @@ impl<'a> ParseFunctionContext<'a> { } // backwards branch ("b", Argument::BranchDest(offset), _, _, _) if offset < 0 => { + if let Some(destination) = Function::is_branch(ins, parsed_ins, address) + && let Some((_, function)) = self.found_functions.range(..=destination).last() + && function.start_address >= destination + { + let thumb = matches!(ins, Ins::Thumb(_)); + if thumb != function.is_thumb() { + // Instruction mode must match + return false; + } + } // Branch must be within current function (infinite loop) or outside current module (tail call) Function::is_branch(ins, parsed_ins, address) .map(|destination| { From bf674c7a2348905b16b24346d25291b8bb25dbed Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 13:05:47 +0200 Subject: [PATCH 11/45] Add exception for `add pc, pc, r*, lsl #0x2` as return instruction --- lib/src/analysis/functions.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 13e9be0..9ca0d46 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -1121,13 +1121,14 @@ impl<'a> ParseFunctionContext<'a> { ) => true, // add pc, r*, r*, lsl #* // Another weird one from Bowser's Inside Story's ITCM module (0x01ff84f8 in EU version) + // An exception is `add pc, pc, r*, lsl #0x2` which is for jump tables and not a return ( "add", Argument::Reg(Reg { reg: Register::Pc, .. }), - Argument::Reg(_), + Argument::Reg(Reg { reg, .. }), Argument::Reg(_), Argument::ShiftImm(ShiftImm { op: Shift::Lsl, imm: _ }), - ) => true, + ) if reg != Register::Pc => true, _ => false, } } From efac4f574ea75f560397413320c5179ca3abf416 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 13:06:54 +0200 Subject: [PATCH 12/45] Always mark backwards branches as returns --- lib/src/analysis/functions.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 9ca0d46..409a8e9 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -1090,14 +1090,7 @@ impl<'a> ParseFunctionContext<'a> { return false; } } - // Branch must be within current function (infinite loop) or outside current module (tail call) - Function::is_branch(ins, parsed_ins, address) - .map(|destination| { - destination >= function_start - || destination < module_start_address - || destination >= module_end_address - }) - .unwrap_or(false) + true } // subs pc, lr, * ( From 149dad567bfa5d38cba5002e0e03f90488a5f440 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 26 Apr 2026 15:07:42 +0200 Subject: [PATCH 13/45] Add illegal code pattern --- lib/src/analysis/functions.rs | 9 +++++++++ lib/src/analysis/illegal_code.rs | 2 ++ 2 files changed, 11 insertions(+) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 409a8e9..2faa1c2 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -20,6 +20,7 @@ use super::{ secure_area::SecureAreaState, }; use crate::{ + analysis::illegal_code::ILLEGAL_CODE_PATTERNS, config::symbol::{SymbolMap, SymbolMapError}, util::bytes::FromSlice, }; @@ -261,6 +262,14 @@ impl Function { while !function_code.is_empty() && address <= *upper_bounds.first().unwrap_or(&last_function_address) { + for illegal_pattern in ILLEGAL_CODE_PATTERNS { + if function_code.starts_with(illegal_pattern) { + address += illegal_pattern.len() as u32; + function_code = &module_code[(address - base_address) as usize..]; + continue; + } + } + let thumb = Function::is_thumb_function(address, function_code); let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm }; diff --git a/lib/src/analysis/illegal_code.rs b/lib/src/analysis/illegal_code.rs index b01dcdf..31bd0e5 100644 --- a/lib/src/analysis/illegal_code.rs +++ b/lib/src/analysis/illegal_code.rs @@ -59,3 +59,5 @@ impl IllegalCodeState { self == Self::Illegal } } + +pub const ILLEGAL_CODE_PATTERNS: &[&[u8]] = &[&[0x00, 0x02, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00]]; From 2f926da37024ea4a5db485891f54d2af48b0933b Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:37:47 +0200 Subject: [PATCH 14/45] Distinguish error messages There was a duplicate error message since I copied over data analysis code from CLI to library --- cli/src/analysis/data.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index 3b5f352..ecd5900 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -18,11 +18,11 @@ pub struct AnalyzeExternalReferencesOptions<'a> { #[derive(Debug, Snafu)] pub enum AnalyzeExternalReferencesError { #[snafu(display( - "Local function call from {from:#010x} in {module_kind} to {to:#010x} leads to no function" + "Failed to add relocation for local function call from {from:#010x} in {module_kind} to {to:#010x} as it leads to no function" ))] LocalFunctionNotFound { from: u32, to: u32, module_kind: ModuleKind }, #[snafu(display( - "Function call from {from:#010x} in {from_module} to {to:#010x} in {to_module} leads to a non-function symbol" + "Failed to add relocation for function call from {from:#010x} in {from_module} to {to:#010x} in {to_module} as it leads to a non-function symbol" ))] InvalidCallDestinationSymbol { from: u32, From e8734d3958308ee569f349cabb8653f714e3b6b1 Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:38:30 +0200 Subject: [PATCH 15/45] Support external label symbols when creating relocations for pool constants --- cli/src/analysis/data.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index ecd5900..d826376 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -4,7 +4,7 @@ use ds_decomp::{ module::{Module, ModuleKind}, relocations::{Relocation, RelocationFromModulesError, RelocationModule}, section::{SectionCodeError, SectionIndex, SectionKind}, - symbol::{SymFunction, SymLabel, SymbolKind, SymbolMapError, SymbolMaps}, + symbol::{InstructionMode, SymFunction, SymLabel, SymbolKind, SymbolMapError, SymbolMaps}, }, }; use snafu::Snafu; @@ -242,14 +242,24 @@ fn find_symbol_candidates( return None; } let (section_index, section) = module.sections().get_by_contained_address(pointer)?; + let symbol_map = options.symbol_maps.get(module.kind()).unwrap(); if section.kind() == SectionKind::Code { - let function = section.functions().get(&(pointer & !1))?; + let (_, symbol) = symbol_map.by_address(pointer & !1).unwrap()?; + let symbol_is_thumb = match &symbol.kind { + SymbolKind::Function(function) => function.mode == InstructionMode::Thumb, + SymbolKind::Label(label) => label.mode == InstructionMode::Thumb, + SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => return None, + }; + let thumb = (pointer & 1) != 0; - if function.is_thumb() != thumb { + if symbol_is_thumb != thumb { return None; } } - let symbol_map = options.symbol_maps.get(module.kind()).unwrap(); if let Some((_, symbol)) = symbol_map.by_address(pointer).unwrap() && symbol.local { From eab42d636aa476660f2d42002243bbdafceafa0c Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:39:48 +0200 Subject: [PATCH 16/45] check symbols: Skip label symbols Some dsd projects complain here when checking locations of external labels, but label symbols aren't imported by SymbolMapsExt::from_object so there's no reason to check them --- cli/src/cmd/check/symbols.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cli/src/cmd/check/symbols.rs b/cli/src/cmd/check/symbols.rs index f7beea7..95e00ba 100644 --- a/cli/src/cmd/check/symbols.rs +++ b/cli/src/cmd/check/symbols.rs @@ -88,6 +88,11 @@ impl CheckSymbols { break; } + if matches!(target_symbol.kind, SymbolKind::Label(_)) { + // Label symbols are not imported by SymbolMapsExt::from_object + continue; + } + let Some(symbol_iter) = object.for_address(target_symbol.addr) else { num_mismatches += 1; log::error!( From 3027c508b5093ca4a739e6a6acef36815de542a0 Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:39:59 +0200 Subject: [PATCH 17/45] check symbols: Clearer output --- cli/src/cmd/check/symbols.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cli/src/cmd/check/symbols.rs b/cli/src/cmd/check/symbols.rs index 95e00ba..b0ab558 100644 --- a/cli/src/cmd/check/symbols.rs +++ b/cli/src/cmd/check/symbols.rs @@ -96,7 +96,7 @@ impl CheckSymbols { let Some(symbol_iter) = object.for_address(target_symbol.addr) else { num_mismatches += 1; log::error!( - "Symbol '{}' in {} at {:#010x} not found in linked binary", + "Symbol '{}' in {} at {:#010x} not found by address in linked binary", target_symbol.name, module_kind, target_symbol.addr @@ -105,6 +105,8 @@ impl CheckSymbols { for (_, candidate) in candidates { log::error!(" Matching name found at {:#010x}", candidate.addr); } + } else { + log::error!(" No other symbols found with the same name"); } continue; }; @@ -116,7 +118,7 @@ impl CheckSymbols { else { num_mismatches += 1; log::error!( - "Symbol '{}' in {} at {:#010x} not found in linked binary", + "Symbol '{}' in {} at {:#010x} not found by fuzzy name in linked binary", target_symbol.name, module_kind, target_symbol.addr @@ -125,6 +127,8 @@ impl CheckSymbols { for (_, candidate) in candidates { log::error!(" Matching name found at {:#010x}", candidate.addr); } + } else { + log::error!(" No other symbols found with the same name"); } if let Some(candidates) = object.for_address(target_symbol.addr) { for (_, candidate) in candidates { From 0d96ef434113d2807d01f5b7ded70749f32553ca Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:43:10 +0200 Subject: [PATCH 18/45] test: Update error type for allowing unknown function calls It was moved from CLI to library and that changed which error type we want to look for --- cli/tests/test_roundtrip.rs | 12 ++++++++---- lib/src/analysis/mod.rs | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cli/tests/test_roundtrip.rs b/cli/tests/test_roundtrip.rs index 0d62f9e..a08ad9b 100644 --- a/cli/tests/test_roundtrip.rs +++ b/cli/tests/test_roundtrip.rs @@ -9,9 +9,11 @@ use std::{ }; use anyhow::Result; -use ds_decomp::config::config::Config; +use ds_decomp::{ + analysis::FindLocalDataError, + config::{config::Config, module::ModuleError}, +}; use ds_decomp_cli::{ - analysis::data::AnalyzeExternalReferencesError, cmd::{CheckModules, CheckSymbols, ConfigRom, Delink, Disassemble, Init, JsonDelinks, Lcf}, util::io::{create_dir_all, read_to_string}, }; @@ -58,8 +60,10 @@ fn test_roundtrip() -> Result<()> { // Init dsd project let dsd_config_dir = dsd_init(&project_path, &rom_config, false).or_else(|e| { - match e.downcast_ref::() { - Some(AnalyzeExternalReferencesError::LocalFunctionNotFound { .. }) => { + match e.downcast_ref::() { + Some(ModuleError::FindLocalData { + source: FindLocalDataError::LocalFunctionNotFound { .. }, + }) => { log::info!("dsd init failed, trying again with unknown function calls"); dsd_init(&project_path, &rom_config, true) } diff --git a/lib/src/analysis/mod.rs b/lib/src/analysis/mod.rs index dbc161a..ae3fbe0 100644 --- a/lib/src/analysis/mod.rs +++ b/lib/src/analysis/mod.rs @@ -9,3 +9,5 @@ mod inline_table; pub(crate) mod jump_table; pub(crate) mod main; pub mod secure_area; + +pub use data::FindLocalDataError; From f13c92b94994de51d31631ba558b81a3ebff1475 Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:43:19 +0200 Subject: [PATCH 19/45] Treat jump table branches as conditional --- lib/src/analysis/functions.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 2faa1c2..7f15771 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -627,6 +627,7 @@ struct ParseFunctionContext<'a> { last_pool_address: Option, /// State machine for detecting jump tables and adding them as symbols jump_table_state: JumpTableState, + jump_table_end_address: Option, /// State machine for detecting branches (B, not BL) to other functions function_branch_state: FunctionBranchState, /// State machine for detecting inline data tables within the function @@ -708,6 +709,7 @@ impl<'a> ParseFunctionContext<'a> { } else { JumpTableState::Arm(Default::default()) }, + jump_table_end_address: None, function_branch_state: Default::default(), inline_table_state: Default::default(), illegal_code_state: Default::default(), @@ -739,8 +741,11 @@ impl<'a> ParseFunctionContext<'a> { self.jump_table_state = self.jump_table_state.handle(address, ins, parsed_ins, &mut self.jump_tables); - self.last_conditional_destination = - self.last_conditional_destination.max(self.jump_table_state.table_end_address()); + if let Some(table_end_address) = self.jump_table_state.table_end_address() { + self.last_conditional_destination = + self.last_conditional_destination.max(Some(table_end_address)); + self.jump_table_end_address = Some(table_end_address); + } if let Some(label) = self.jump_table_state.get_label(address, ins) { self.labels.insert(label); self.last_conditional_destination = self.last_conditional_destination.max(Some(label)); @@ -978,7 +983,8 @@ impl<'a> ParseFunctionContext<'a> { in_conditional_block: bool, ) -> Option { self.labels.insert(destination); - if in_conditional_block || ins.is_conditional() { + let is_table_jump = self.jump_table_end_address.map(|end| address < end).unwrap_or(false); + if in_conditional_block || ins.is_conditional() || is_table_jump { self.last_conditional_destination = self.last_conditional_destination.max(Some(destination)); } From 7b2baff1829ca21901acb975a3edc980ebaa2c29 Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:43:27 +0200 Subject: [PATCH 20/45] Add ARM jump table case `ldmiahi` to return early if no case matched --- lib/src/analysis/jump_table.rs | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/src/analysis/jump_table.rs b/lib/src/analysis/jump_table.rs index a020f07..129b2fc 100644 --- a/lib/src/analysis/jump_table.rs +++ b/lib/src/analysis/jump_table.rs @@ -64,16 +64,20 @@ pub enum JumpTableStateArm { /// `...` other non-comparing instructions /// `addls pc, pc, index, lsl #0x2` jump to nearby branch instruction, OR /// `bgt @skip` skip jump table if SIGNED index is out of bounds + /// `ldmiahi sp!, {...}` return if index is out of bounds JumpOrBranchSigned { index: Register, limit: u32 }, /// if index is signed: - /// `cmp index, #0x0` check that the index is non-negative + /// `cmp index, #0x0` check that the index is non-negative SignedBaseline { index: Register, limit: u32 }, /// if index is signed: /// `addge pc, pc, index, lsl #0x2` jump to nearby branch instruction JumpSigned { index: Register, limit: u32 }, + /// `add pc, pc, index, lsl #0x2` jump to nearby branch instruction + JumpAfterReturn { index: Register, limit: u32 }, + /// valid table detected, starts from `table_address` with a size of `limit` ValidJumpTable { table_address: u32, limit: u32 }, } @@ -137,6 +141,14 @@ impl JumpTableStateArm { Argument::None, Argument::None, ) => Self::SignedBaseline { index, limit }, + ( + "ldmhiia", + Argument::Reg(Reg { reg: Register::Sp, writeback: true, .. }), + Argument::RegList(_), + Argument::None, + Argument::None, + Argument::None, + ) => Self::JumpAfterReturn { index, limit }, _ if ins.updates_condition_flags() => Self::default(), _ => self, } @@ -174,6 +186,29 @@ impl JumpTableStateArm { _ => self, } } + Self::JumpAfterReturn { index, limit } => { + match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3], args[4]) { + ( + "add", + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(Reg { reg, .. }), + Argument::ShiftImm(ShiftImm { imm: 2, op: Shift::Lsl }), + Argument::None, + ) if reg == index => { + let table_address = address + 8; + let size = (limit + 1) * 4; + jump_tables.insert(table_address, JumpTable { + address: table_address, + size, + code: true, + }); + Self::ValidJumpTable { table_address: address + 8, limit } + } + _ if ins.updates_condition_flags() => Self::default(), + _ => self, + } + } Self::ValidJumpTable { table_address, limit } => { let end = table_address + limit * 4; if address > end { Self::default() } else { self } From d215a1b6c2f56d9067dcecb0a34b4c3582d53e53 Mon Sep 17 00:00:00 2001 From: Aetias Date: Mon, 27 Apr 2026 19:43:54 +0200 Subject: [PATCH 21/45] Mark existing labels as external on `SymbolMap::add_external_label` --- lib/src/config/symbol.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/src/config/symbol.rs b/lib/src/config/symbol.rs index 8ad2b60..1015932 100644 --- a/lib/src/config/symbol.rs +++ b/lib/src/config/symbol.rs @@ -489,7 +489,22 @@ impl SymbolMap { thumb: bool, ) -> Result<(SymbolId, &Symbol), SymbolMapError> { let name = Self::label_name(addr); - self.add_if_new_address(Symbol::new_external_label(name, addr, thumb)) + if let Some((existing_id, _)) = self.by_address(addr)? { + let existing_symbol = self.get_mut(existing_id).unwrap(); + if let SymbolKind::Label(existing_label) = &mut existing_symbol.kind { + existing_label.external = true; + Ok((existing_id, existing_symbol)) + } else { + MultipleSymbolsSnafu { + address: addr, + name, + other_name: existing_symbol.name.clone(), + } + .fail() + } + } else { + Ok(self.add(Symbol::new_external_label(name, addr, thumb))) + } } pub fn get_label(&self, addr: u32) -> Result, SymbolMapError> { From 76e42e6ce9447b59650dddd4c16b98e959b3cabe Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 17:28:25 +0200 Subject: [PATCH 22/45] Skip consecutive pointers --- lib/src/analysis/functions.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 7f15771..6c965fa 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -270,6 +270,25 @@ impl Function { } } + // Skip if more than 10 consecutive valid pointer values, as that is most certainly not + // valid code at that point + let mut function_code_iter = function_code; + let mut pointer_count = 0; + while function_code_iter.len() > 4 { + let word: u32 = u32::from_le_slice(function_code_iter); + function_code_iter = &function_code_iter[4..]; + if (0x01ff8000..0x02400000).contains(&word) { + pointer_count += 1; + } else { + break; + } + } + if pointer_count >= 10 { + address += pointer_count * 4; + function_code = &module_code[(address - base_address) as usize..]; + continue; + } + let thumb = Function::is_thumb_function(address, function_code); let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm }; From 8bde2f65b87eda747b1fbdc902042c1d573051a8 Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 19:58:56 +0200 Subject: [PATCH 23/45] Handle branch to wrong instruction mode as illegal instruction --- lib/src/analysis/functions.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 6c965fa..8877ddd 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -796,6 +796,18 @@ impl<'a> ParseFunctionContext<'a> { return ParseFunctionState::IllegalIns { address, ins }; } + if let Some(destination) = Function::is_branch(ins, parsed_ins, address) + && destination < self.start_address + && let Some((_, function)) = self.found_functions.range(..=destination).last() + && function.start_address < destination + { + let thumb = matches!(ins, Ins::Thumb(_)); + if thumb != function.is_thumb() { + // Instruction mode must match + return ParseFunctionState::IllegalIns { address, ins }; + } + } + let in_conditional_block = Some(address) < self.last_conditional_destination; let is_return = self.is_return( ins, @@ -1113,19 +1125,7 @@ impl<'a> ParseFunctionContext<'a> { true } // backwards branch - ("b", Argument::BranchDest(offset), _, _, _) if offset < 0 => { - if let Some(destination) = Function::is_branch(ins, parsed_ins, address) - && let Some((_, function)) = self.found_functions.range(..=destination).last() - && function.start_address >= destination - { - let thumb = matches!(ins, Ins::Thumb(_)); - if thumb != function.is_thumb() { - // Instruction mode must match - return false; - } - } - true - } + ("b", Argument::BranchDest(offset), _, _, _) if offset < 0 => true, // subs pc, lr, * ( "subs", From 9d1b571f221765c86ea206c509b2de8c44bb5d05 Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 20:24:56 +0200 Subject: [PATCH 24/45] Don't add relocations to non-external labels --- cli/src/analysis/data.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index d826376..75ff483 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -247,8 +247,11 @@ fn find_symbol_candidates( let (_, symbol) = symbol_map.by_address(pointer & !1).unwrap()?; let symbol_is_thumb = match &symbol.kind { SymbolKind::Function(function) => function.mode == InstructionMode::Thumb, - SymbolKind::Label(label) => label.mode == InstructionMode::Thumb, - SymbolKind::Undefined + SymbolKind::Label(SymLabel { external: true, mode }) => { + *mode == InstructionMode::Thumb + } + SymbolKind::Label(SymLabel { external: false, .. }) + | SymbolKind::Undefined | SymbolKind::PoolConstant | SymbolKind::JumpTable(_) | SymbolKind::Data(_) From 298aa5f18427e049f5173c123e5c7aa4ae6c71c7 Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 20:35:01 +0200 Subject: [PATCH 25/45] Treat Thumb NOP as illegal --- lib/src/analysis/illegal_code.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/src/analysis/illegal_code.rs b/lib/src/analysis/illegal_code.rs index 31bd0e5..bdce543 100644 --- a/lib/src/analysis/illegal_code.rs +++ b/lib/src/analysis/illegal_code.rs @@ -20,6 +20,16 @@ impl IllegalCodeState { return Self::Illegal; } + if matches!(ins, Ins::Thumb(_)) + && parsed_ins.mnemonic == "lsl" + && let Arg::Reg(Reg { reg: Register::R0, .. }) = parsed_ins.args[0] + && let Arg::Reg(Reg { reg: Register::R0, .. }) = parsed_ins.args[1] + && let Arg::UImm(0) = parsed_ins.args[2] + { + // In Thumb with divided syntax, 0000 disassembles into lsl r0, r0, #0x0 and is a no-op + return Self::Illegal; + } + let args = &parsed_ins.args; match (self, ins.mnemonic(), args[0], args[1], args[2]) { // Find registers with shifted value From c4cd2ea1cad0f232267b9662c9ac37498e9a8b81 Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 20:44:33 +0200 Subject: [PATCH 26/45] Force ITCM .text to start at ITCM base address --- lib/src/config/module.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index 01d5560..4570615 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -860,7 +860,7 @@ impl Module { } fn find_sections_itcm(&mut self, symbol_map: &mut SymbolMap) -> Result<(), ModuleError> { - let text_functions = self + let mut text_functions = self .find_functions( symbol_map, FunctionSearchOptions { @@ -873,6 +873,8 @@ impl Module { &self.default_func_prefix.clone(), )? .ok_or_else(|| NoItcmFunctionsSnafu.build())?; + // Force .text start to base address for cases where first function is not at the base address + text_functions.start = self.base_address; let text_end = text_functions.end; self.add_text_section(text_functions)?; From ce3dcaec7eae241aea34bb940cb9999682da364c Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 21:39:52 +0200 Subject: [PATCH 27/45] Mark branches outside of program as illegal --- lib/src/analysis/functions.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 8877ddd..25dc89f 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -796,14 +796,20 @@ impl<'a> ParseFunctionContext<'a> { return ParseFunctionState::IllegalIns { address, ins }; } - if let Some(destination) = Function::is_branch(ins, parsed_ins, address) - && destination < self.start_address - && let Some((_, function)) = self.found_functions.range(..=destination).last() - && function.start_address < destination - { - let thumb = matches!(ins, Ins::Thumb(_)); - if thumb != function.is_thumb() { - // Instruction mode must match + if let Some(destination) = Function::is_branch(ins, parsed_ins, address) { + if destination < self.start_address + && let Some((_, function)) = self.found_functions.range(..=destination).last() + && function.start_address < destination + { + let thumb = matches!(ins, Ins::Thumb(_)); + if thumb != function.is_thumb() { + // Instruction mode must match + return ParseFunctionState::IllegalIns { address, ins }; + } + } + + if !(0x01ff8000..0x03000000).contains(&destination) { + // Branch goes outside of program return ParseFunctionState::IllegalIns { address, ins }; } } From 82566ee372ce0ef06c47e8dcf9bcd45fa306bc96 Mon Sep 17 00:00:00 2001 From: Aetias Date: Tue, 28 Apr 2026 21:40:26 +0200 Subject: [PATCH 28/45] Shorten max search distance for .text functions in ARM9 main --- lib/src/config/module.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index 4570615..207f4a1 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -768,7 +768,7 @@ impl Module { start_address: Some(main_start), end_address: Some(text_max), // Skips over segments of strange EOR instructions which are never executed - max_function_start_search_distance: u32::MAX, + max_function_start_search_distance: 0x2000, use_data_as_upper_bound: true, // There are some handwritten assembly functions in ARM9 main that don't follow the procedure call standard check_defs_uses: false, From 34b99869d6c71f55b2d07d492f30fb67e0d1cefd Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 16 May 2026 09:57:36 +0200 Subject: [PATCH 29/45] Force ARM9 .text to end at next section start --- lib/src/config/module.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index 207f4a1..a93a57d 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -761,7 +761,7 @@ impl Module { let exception_start = exception_data.as_ref().and_then(ExceptionData::exception_start); let text_max = exception_start.unwrap_or(read_only_end); let main_start = self.find_build_info_end_address(arm9); - let FoundFunctions { functions: text_functions, end: mut text_end, .. } = self + let FoundFunctions { functions: text_functions, end: text_end, .. } = self .find_functions( symbol_map, FunctionSearchOptions { @@ -782,7 +782,7 @@ impl Module { self.add_text_section(FoundFunctions { functions, start: text_start, end: text_end })?; // Add .exception and .exceptix sections if they exist - if let Some(exception_data) = exception_data { + let text_exceptix_end = if let Some(exception_data) = exception_data { if let Some(exception_start) = exception_data.exception_start() { self.sections.add(Section::new(SectionOptions { name: ".exception".to_string(), @@ -805,11 +805,13 @@ impl Module { comments: Comments::new(), })?)?; - text_end = exception_data.exceptix_end(); - } + exception_data.exceptix_end() + } else { + text_end + }; // .rodata - let rodata_start = rodata_start.unwrap_or(text_end); + let rodata_start = rodata_start.unwrap_or(text_exceptix_end); self.add_rodata_section(rodata_start, ctor.start)?; // .data and .bss @@ -823,12 +825,16 @@ impl Module { if let Some(section_after_text) = section_after_text && text_end != section_after_text.start_address() { + let next_start = section_after_text.start_address(); log::warn!( - "Expected .text to end ({:#010x}) where {} starts ({:#010x})", + "Expected .text to end ({:#010x}) where {} starts ({:#010x}), extending .text to remove the gap", text_end, section_after_text.name(), section_after_text.start_address() ); + + let (_, text_section) = self.sections.by_name_mut(".text").unwrap(); + text_section.set_end_address(next_start); } Ok(()) From b1b39918161b57e931b246c363c61a67e8cba0bd Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 16 May 2026 09:58:09 +0200 Subject: [PATCH 30/45] Add `GetExceptix` function case in Thumb --- lib/src/analysis/exception.rs | 38 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/lib/src/analysis/exception.rs b/lib/src/analysis/exception.rs index 479732f..49b8241 100644 --- a/lib/src/analysis/exception.rs +++ b/lib/src/analysis/exception.rs @@ -19,18 +19,32 @@ struct GetExceptixFunction { end_offset: u32, } -const GET_EXCEPTIX_FUNCTIONS: [GetExceptixFunction; 1] = [GetExceptixFunction { - code: &[ - 0x10, 0x20, 0x9f, 0xe5, // ldr r2, [pc, #0x10] - 0x10, 0x10, 0x9f, 0xe5, // ldr r1, [pc, #0x10] - 0x0c, 0x20, 0x80, 0xe5, // str r2, [r0, #0xc] - 0x10, 0x10, 0x80, 0xe5, // str r1, [r0, #0x10] - 0x01, 0x00, 0xa0, 0xe3, // mov r0, #1 - 0x1e, 0xff, 0x2f, 0xe1, // bx lr - ], - start_offset: 0x18, - end_offset: 0x1c, -}]; +const GET_EXCEPTIX_FUNCTIONS: &[GetExceptixFunction] = &[ + GetExceptixFunction { + code: &[ + 0x10, 0x20, 0x9f, 0xe5, // ldr r2, [pc, #0x10] + 0x10, 0x10, 0x9f, 0xe5, // ldr r1, [pc, #0x10] + 0x0c, 0x20, 0x80, 0xe5, // str r2, [r0, #0xc] + 0x10, 0x10, 0x80, 0xe5, // str r1, [r0, #0x10] + 0x01, 0x00, 0xa0, 0xe3, // mov r0, #1 + 0x1e, 0xff, 0x2f, 0xe1, // bx lr + ], + start_offset: 0x18, + end_offset: 0x1c, + }, + GetExceptixFunction { + code: &[ + 0x02, 0x49, // ldr r1, [pc, #0x8] + 0xc1, 0x60, // str r1, [r0, #0xc] + 0x02, 0x49, // ldr r1, [pc, #0x8] + 0x01, 0x61, // str r1, [r0, #0x10] + 0x01, 0x20, // movs r0, #1 + 0x70, 0x47, // bx lr + ], + start_offset: 0xc, + end_offset: 0x10, + }, +]; #[repr(C)] #[derive(Zeroable, Pod, Clone, Copy)] From dcdf2df00ed0c17ba11b52abc0f077d620ea7aca Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 16 May 2026 09:58:57 +0200 Subject: [PATCH 31/45] test: Print whether `--allow-unknown-function-calls` was used --- cli/tests/test_roundtrip.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/cli/tests/test_roundtrip.rs b/cli/tests/test_roundtrip.rs index a08ad9b..8aad352 100644 --- a/cli/tests/test_roundtrip.rs +++ b/cli/tests/test_roundtrip.rs @@ -59,11 +59,13 @@ fn test_roundtrip() -> Result<()> { let rom_config = extract_path.join("config.yaml"); // Init dsd project + let mut allowed_unknown_function_calls = false; let dsd_config_dir = dsd_init(&project_path, &rom_config, false).or_else(|e| { match e.downcast_ref::() { Some(ModuleError::FindLocalData { source: FindLocalDataError::LocalFunctionNotFound { .. }, }) => { + allowed_unknown_function_calls = true; log::info!("dsd init failed, trying again with unknown function calls"); dsd_init(&project_path, &rom_config, true) } @@ -73,10 +75,17 @@ fn test_roundtrip() -> Result<()> { let dsd_config_yaml = dsd_config_dir.join("arm9/config.yaml"); let dsd_config = Config::from_file(&dsd_config_yaml)?; let target_config_dir = configs_dir.join(base_name); - assert!( - target_config_dir.exists(), - "Init succeeded, copy the config directory to tests/configs/ to compare future runs" - ); + if allowed_unknown_function_calls { + assert!( + target_config_dir.exists(), + "Init succeeded with unknown function calls, copy the config directory to tests/configs/ to compare future runs" + ); + } else { + assert!( + target_config_dir.exists(), + "Init succeeded, copy the config directory to tests/configs/ to compare future runs" + ); + } assert!(directory_equals(&target_config_dir, &dsd_config_dir)?); From 118fb28884eec74861ee42992511b742576259e1 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 16 May 2026 09:59:48 +0200 Subject: [PATCH 32/45] lcf: Allow spaces in directories leading up to .o files --- cli/src/cmd/lcf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/cmd/lcf.rs b/cli/src/cmd/lcf.rs index fdd5ef6..456f82b 100644 --- a/cli/src/cmd/lcf.rs +++ b/cli/src/cmd/lcf.rs @@ -190,7 +190,7 @@ impl Lcf { &config_dir.join(&config.delinks_path) }; let file = base_path.join(file_path).with_extension("o").clean(); - writeln!(writer, "{}", file.display())?; + writeln!(writer, "\"{}\"", file.display())?; } Ok(()) } From 6bde2091f043d82064e275dcfdcb74a6be4fdaeb Mon Sep 17 00:00:00 2001 From: Aetias Date: Sat, 16 May 2026 10:00:27 +0200 Subject: [PATCH 33/45] dis: Refuse functions in .bss sections --- cli/src/cmd/dis.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/cli/src/cmd/dis.rs b/cli/src/cmd/dis.rs index 62836d1..811f1df 100644 --- a/cli/src/cmd/dis.rs +++ b/cli/src/cmd/dis.rs @@ -10,7 +10,7 @@ use ds_decomp::config::{ config::Config, delinks::{DelinkFile, Delinks}, module::Module, - section::Section, + section::{Section, SectionKind}, symbol::{InstructionMode, Symbol, SymbolKind, SymbolMaps}, }; use ds_rom::rom::{Rom, RomLoadOptions}; @@ -150,10 +150,27 @@ impl Disassemble { ); match symbol.kind { SymbolKind::Function(sym_function) => { + if section.kind() == SectionKind::Bss { + log::error!( + "Can't disassemble function at {:#010x} in {} because it's in uninitialized section {}", + symbol.addr, + module.kind(), + section.name() + ); + continue; + } + + let code = code.with_context(|| { + format!( + "No code to dump for function at {:#010x} in {}", + symbol.addr, + module.kind() + ) + })?; if sym_function.unknown { let function_offset = symbol.addr - section.start_address(); if offset < function_offset { - Self::dump_bytes(code.unwrap(), offset, function_offset, writer)?; + Self::dump_bytes(code, offset, function_offset, writer)?; writeln!(writer)?; offset = function_offset; } @@ -178,7 +195,7 @@ impl Disassemble { let function_offset = function.start_address() - section.start_address(); if offset < function_offset { - Self::dump_bytes(code.unwrap(), offset, function_offset, writer)?; + Self::dump_bytes(code, offset, function_offset, writer)?; writeln!(writer)?; } From 5efbe8f5e285d860ad788f8977db5c36aebdb89d Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 12:58:11 +0200 Subject: [PATCH 34/45] Handle jump table case using `ldrb` instead of `ldrh` --- cli/src/analysis/functions.rs | 101 +++++++++++++++++------ cli/src/config/symbol.rs | 29 +++---- lib/src/analysis/functions.rs | 7 +- lib/src/analysis/jump_table.rs | 146 +++++++++++++++++++++++++-------- lib/src/analysis/mod.rs | 2 +- lib/src/config/symbol.rs | 13 +-- 6 files changed, 216 insertions(+), 82 deletions(-) diff --git a/cli/src/analysis/functions.rs b/cli/src/analysis/functions.rs index 0dd7236..7abdbff 100644 --- a/cli/src/analysis/functions.rs +++ b/cli/src/analysis/functions.rs @@ -1,7 +1,13 @@ use std::io; use anyhow::{Result, bail}; -use ds_decomp::analysis::functions::Function; +use ds_decomp::{ + analysis::{ + functions::Function, + jump_table::{JumpTableKind, ThumbJumpTableKind}, + }, + config::symbol::SymJumpTable, +}; use unarm::{ArmVersion, DisplayOptions, Endian, ParseFlags, ParseMode, Parser, RegNames}; use crate::config::symbol::{SymDataExt, SymbolLookup}; @@ -88,21 +94,38 @@ impl FunctionExt for Function { // write instruction match jump_table { - Some((table, sym)) if !table.code => { - let (directive, value) = if self.is_thumb() { - (".short", i32::from(ins.code() as i16)) - } else { - (".word", ins.code().cast_signed()) - }; - let label_address = (sym.addr.cast_signed() + value + 2).cast_unsigned(); - let Some(label) = symbols.symbol_map.get_label(label_address)? else { - log::error!( - "Expected label for jump table destination {label_address:#010x}" - ); - bail!("Expected label for jump table destination {label_address:#010x}"); - }; - write!(w, " {directive} {} - {} - 2", label.name, sym.name)?; - } + Some((SymJumpTable { kind: JumpTableKind::Thumb(kind), .. }, sym)) => match kind { + ThumbJumpTableKind::Halfword => { + let value = i32::from(ins.code() as i16); + write_numerical_jump_table_entry( + w, symbols, sym, value, ".short", address, + )?; + } + ThumbJumpTableKind::Byte => { + let code = ins.code() as i16; + let [first_value, second_value] = code.to_le_bytes(); + let first_value = first_value as i8 as i32; + let second_value = second_value as i8 as i32; + write_numerical_jump_table_entry( + w, + symbols, + sym, + first_value, + ".byte", + address, + )?; + write_jump_table_case(w, jump_table, 1, address)?; + write_numerical_jump_table_entry( + w, + symbols, + sym, + second_value, + ".byte", + address + 1, + )?; + write_jump_table_case(w, jump_table, 1, address + 1)?; + } + }, _ => { if parser.mode != ParseMode::Data { write!(w, " ")?; @@ -127,22 +150,15 @@ impl FunctionExt for Function { { symbols.write_ambiguous_symbols_comment(w, address, reference)?; } + write_jump_table_case(w, jump_table, ins_size, address)?; } } - // write jump table case - if let Some((_table, sym)) = jump_table { - let case = (address - sym.addr) / ins_size; - writeln!(w, " ; case {case}")?; - } else { - writeln!(w)?; - } - // write pool constants let next_address = address + ins_size; for i in 0.. { let pool_address = next_address + i * 4; - if self.pool_constants().contains(&pool_address) { + if self.pool_constants().contains_key(&pool_address) { let start = pool_address - base_address; let bytes = &module_code[start as usize..]; let const_value = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); @@ -189,3 +205,38 @@ impl FunctionExt for Function { Ok(()) } } + +fn write_jump_table_case( + w: &mut W, + jump_table: Option<(SymJumpTable, &ds_decomp::config::symbol::Symbol)>, + ins_size: u32, + address: u32, +) -> std::result::Result<(), io::Error> { + if let Some((_table, sym)) = jump_table { + let case = (address - sym.addr) / ins_size; + writeln!(w, " ; case {case}") + } else { + writeln!(w) + } +} + +fn write_numerical_jump_table_entry( + w: &mut W, + symbols: &SymbolLookup<'_>, + sym: &ds_decomp::config::symbol::Symbol, + value: i32, + directive: &str, + address: u32, +) -> Result<(), anyhow::Error> { + let label_address = (sym.addr.cast_signed() + value + 2).cast_unsigned(); + let Some(label) = symbols.symbol_map.get_label(label_address)? else { + log::error!( + "Expected label for jump table destination from {address:#010x} to {label_address:#010x}" + ); + bail!( + "Expected label for jump table destination from {address:#010x} to {label_address:#010x}" + ); + }; + write!(w, " {} {} - {} - 2", directive, label.name, sym.name)?; + Ok(()) +} diff --git a/cli/src/config/symbol.rs b/cli/src/config/symbol.rs index 711694a..5931fca 100644 --- a/cli/src/config/symbol.rs +++ b/cli/src/config/symbol.rs @@ -1,12 +1,16 @@ use std::{collections::BTreeMap, io}; -use anyhow::{Result, anyhow, bail}; -use ds_decomp::config::{ - Comments, - module::ModuleKind, - relocations::Relocations, - symbol::{ - InstructionMode, SymData, SymFunction, SymLabel, Symbol, SymbolKind, SymbolMap, SymbolMaps, +use anyhow::{Result, bail}; +use ds_decomp::{ + analysis::jump_table::JumpTableKind, + config::{ + Comments, + module::ModuleKind, + relocations::Relocations, + symbol::{ + InstructionMode, SymData, SymFunction, SymLabel, Symbol, SymbolKind, SymbolMap, + SymbolMaps, + }, }, }; use ds_rom::rom::raw::AutoloadKind; @@ -120,13 +124,10 @@ impl SymbolExt for Symbol { InstructionMode::Thumb => Some("$t"), }, SymbolKind::PoolConstant => Some("$d"), - SymbolKind::JumpTable(jump_table) => { - if jump_table.code { - Some("$a") - } else { - Some("$d") - } - } + SymbolKind::JumpTable(jump_table) => match jump_table.kind { + JumpTableKind::Arm => Some("$a"), + JumpTableKind::Thumb(_) => Some("$d"), + }, SymbolKind::Data(_) => Some("$d"), SymbolKind::Bss(_) => None, } diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 25dc89f..4b6400e 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -765,9 +765,14 @@ impl<'a> ParseFunctionContext<'a> { self.last_conditional_destination.max(Some(table_end_address)); self.jump_table_end_address = Some(table_end_address); } - if let Some(label) = self.jump_table_state.get_label(address, ins) { + if let Some((label, second_label)) = self.jump_table_state.get_labels(address, ins) { self.labels.insert(label); self.last_conditional_destination = self.last_conditional_destination.max(Some(label)); + if let Some(second_label) = second_label { + self.labels.insert(second_label); + self.last_conditional_destination = + self.last_conditional_destination.max(Some(second_label)); + } } if self.jump_table_state.is_numerical_jump_offset() { diff --git a/lib/src/analysis/jump_table.rs b/lib/src/analysis/jump_table.rs index 129b2fc..68357a7 100644 --- a/lib/src/analysis/jump_table.rs +++ b/lib/src/analysis/jump_table.rs @@ -9,8 +9,13 @@ use super::functions::JumpTables; pub struct JumpTable { pub address: u32, pub size: u32, - /// If true, the jump table entries are instructions. Otherwise, they are data. - pub code: bool, + pub kind: JumpTableKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum JumpTableKind { + Arm, + Thumb(ThumbJumpTableKind), } #[derive(Clone, Copy, Debug)] @@ -40,10 +45,10 @@ impl JumpTableState { } } - pub fn get_label(&self, address: u32, ins: Ins) -> Option { + pub fn get_labels(&self, address: u32, ins: Ins) -> Option<(u32, Option)> { match self { Self::Arm(_) => None, - Self::Thumb(state) => state.get_label(address, ins), + Self::Thumb(state) => state.get_labels(address, ins), } } @@ -129,7 +134,7 @@ impl JumpTableStateArm { jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: true, + kind: JumpTableKind::Arm, }); Self::ValidJumpTable { table_address: address + 8, limit } } @@ -178,7 +183,7 @@ impl JumpTableStateArm { jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: true, + kind: JumpTableKind::Arm, }); Self::ValidJumpTable { table_address: address + 8, limit } } @@ -201,7 +206,7 @@ impl JumpTableStateArm { jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: true, + kind: JumpTableKind::Arm, }); Self::ValidJumpTable { table_address: address + 8, limit } } @@ -253,29 +258,37 @@ pub enum JumpTableStateThumb { BranchNegative { index: Register, limit: u32 }, /// `add offset, index, index` multiply index by 2 to calculate jump table offset + /// `mov offset, index` multiply index by 1 (8-bit table items) AddRegReg { index: Register, limit: u32 }, /// `add offset, pc` turn jump table offset into a PC-relative address AddRegPc { offset: Register, limit: u32 }, /// `ldrh jump, [offset, #imm]` load 16-bit jump value from table + /// `ldrb jump, [offset, #imm]` load 8-bit jump value from table LoadOffset { offset: Register, limit: u32, pc_base: u32 }, /// `lsl jump, jump, #0x10` sign extend - SignExtendLsl { jump: Register, table_address: u32, limit: u32 }, + SignExtendLsl { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// `asr jump, jump, #0x10` sign extend - SignExtendAsr { jump: Register, table_address: u32, limit: u32 }, + SignExtendAsr { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// `add pc, jump` do the jump /// `add jump, pc` calculate the jump destination - AddPcReg { jump: Register, table_address: u32, limit: u32 }, + AddPcReg { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// `bx jump` jump to the destination - BxJump { jump: Register, table_address: u32, limit: u32 }, + BxJump { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// valid table detected, starts from `table_address` with a size of `limit` - ValidJumpTable { table_address: u32, limit: u32 }, + ValidJumpTable { table_address: u32, limit: u32, kind: ThumbJumpTableKind }, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ThumbJumpTableKind { + Halfword, + Byte, } impl JumpTableStateThumb { @@ -299,9 +312,12 @@ impl JumpTableStateThumb { parsed_ins: &ParsedIns, jump_tables: &mut JumpTables, ) -> Self { - if let Some(start) = self.check_start(parsed_ins) { + if let Some(end_address) = self.table_end_address() + && address < end_address + { + } else if let Some(start) = self.check_start(parsed_ins) { return start; - }; + } let args = &parsed_ins.args; match self { @@ -372,6 +388,19 @@ impl JumpTableStateThumb { Self::default() } } + ( + "mov", + Argument::Reg(Reg { reg: table_offset, .. }), + Argument::Reg(Reg { reg, .. }), + Argument::None, + Argument::None, + ) => { + if reg == index { + Self::AddRegPc { offset: table_offset, limit } + } else { + Self::default() + } + } _ => Self::default(), } } @@ -402,12 +431,32 @@ impl JumpTableStateThumb { Argument::None, ) if reg == base_reg => { let table_start = (pc_base as i32 - 2 + value * 2) as u32; - Self::SignExtendLsl { jump: offset, table_address: table_start, limit } + Self::SignExtendLsl { + jump: offset, + table_address: table_start, + limit, + kind: ThumbJumpTableKind::Halfword, + } + } + ( + "ldrb", + Argument::Reg(Reg { reg, .. }), + Argument::Reg(Reg { reg: base_reg, deref: true, .. }), + Argument::OffsetImm(OffsetImm { post_indexed: false, value }), + Argument::None, + ) if reg == base_reg => { + let table_start = (pc_base as i32 - 2 + value * 2) as u32; + Self::SignExtendLsl { + jump: offset, + table_address: table_start, + limit, + kind: ThumbJumpTableKind::Byte, + } } _ => Self::default(), } } - Self::SignExtendLsl { jump, table_address, limit } => { + Self::SignExtendLsl { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3]) { ( "lsl", @@ -416,12 +465,12 @@ impl JumpTableStateThumb { Argument::UImm(value), Argument::None, ) if dest_reg == src_reg && dest_reg == jump && value == 0x10 => { - Self::SignExtendAsr { jump, table_address, limit } + Self::SignExtendAsr { jump, table_address, limit, kind } } _ => Self::default(), } } - Self::SignExtendAsr { jump, table_address, limit } => { + Self::SignExtendAsr { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3]) { ( "asr", @@ -430,12 +479,12 @@ impl JumpTableStateThumb { Argument::UImm(value), Argument::None, ) if dest_reg == src_reg && dest_reg == jump && value == 0x10 => { - Self::AddPcReg { jump, table_address, limit } + Self::AddPcReg { jump, table_address, limit, kind } } _ => Self::default(), } } - Self::AddPcReg { jump, table_address, limit } => { + Self::AddPcReg { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1], args[2]) { ( "add", @@ -443,60 +492,76 @@ impl JumpTableStateThumb { Argument::Reg(Reg { reg, .. }), Argument::None, ) if reg == jump => { - let size = (limit + 1) * 2; + let size = (limit + 1) * kind.item_size(); jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: false, + kind: JumpTableKind::Thumb(kind), }); - Self::ValidJumpTable { table_address, limit } + Self::ValidJumpTable { table_address, limit, kind } } ( "add", Argument::Reg(Reg { reg, .. }), Argument::Reg(Reg { reg: Register::Pc, .. }), Argument::None, - ) if reg == jump => Self::BxJump { jump, table_address, limit }, + ) if reg == jump => Self::BxJump { jump, table_address, limit, kind }, _ => Self::default(), } } - Self::BxJump { jump, table_address, limit } => { + Self::BxJump { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1]) { ("bx", Argument::Reg(Reg { reg, .. }), Argument::None) if reg == jump => { let table_address = table_address - 2; - let size = (limit + 1) * 2; + let size = (limit + 1) * kind.item_size(); jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: false, + kind: JumpTableKind::Thumb(kind), }); - Self::ValidJumpTable { table_address, limit } + Self::ValidJumpTable { table_address, limit, kind } } _ => Self::default(), } } - Self::ValidJumpTable { table_address, limit } => { - let end = table_address + limit * 2; - if address > end { Self::default() } else { self } + Self::ValidJumpTable { table_address, limit, kind } => { + let end = table_address + (limit + 1) * kind.item_size(); + if address >= end { Self::default() } else { self } } } } pub fn table_end_address(&self) -> Option { match self { - Self::ValidJumpTable { table_address, limit } => Some(table_address + (limit + 1) * 2), + Self::ValidJumpTable { table_address, limit, kind } => { + Some(table_address + (limit + 1) * kind.item_size()) + } _ => None, } } - pub fn get_label(&self, address: u32, ins: Ins) -> Option { + pub fn get_labels(&self, address: u32, ins: Ins) -> Option<(u32, Option)> { match self { - Self::ValidJumpTable { table_address, limit } => { - let end = table_address + limit * 2; + Self::ValidJumpTable { table_address, limit, kind } => { + let end = table_address + limit * kind.item_size(); if address < *table_address || address > end { None } else { - Some((*table_address as i32 + ins.code() as i16 as i32 + 2) as u32) + let code = ins.code() as i16; + match kind { + ThumbJumpTableKind::Halfword => { + Some(((*table_address as i32 + code as i32 + 2) as u32, None)) + } + ThumbJumpTableKind::Byte => { + let [first_value, second_value] = code.to_le_bytes(); + let first_value = first_value as i8 as i32; + let second_value = second_value as i8 as i32; + Some(( + (*table_address as i32 + first_value + 2) as u32, + Some((*table_address as i32 + second_value + 2) as u32), + )) + } + } } } _ => None, @@ -507,3 +572,12 @@ impl JumpTableStateThumb { matches!(self, JumpTableStateThumb::ValidJumpTable { .. }) } } + +impl ThumbJumpTableKind { + fn item_size(self) -> u32 { + match self { + ThumbJumpTableKind::Halfword => 2, + ThumbJumpTableKind::Byte => 1, + } + } +} diff --git a/lib/src/analysis/mod.rs b/lib/src/analysis/mod.rs index ae3fbe0..0e00830 100644 --- a/lib/src/analysis/mod.rs +++ b/lib/src/analysis/mod.rs @@ -6,7 +6,7 @@ mod function_start; pub mod functions; mod illegal_code; mod inline_table; -pub(crate) mod jump_table; +pub mod jump_table; pub(crate) mod main; pub mod secure_area; diff --git a/lib/src/config/symbol.rs b/lib/src/config/symbol.rs index 1015932..a582f3d 100644 --- a/lib/src/config/symbol.rs +++ b/lib/src/config/symbol.rs @@ -13,7 +13,10 @@ use snafu::{Snafu, ensure}; use super::{ParseContext, config::Config, iter_attributes, module::ModuleKind}; use crate::{ - analysis::{functions::Function, jump_table::JumpTable}, + analysis::{ + functions::Function, + jump_table::{JumpTable, JumpTableKind}, + }, config::{CommentedLine, Comments}, util::{ io::{FileError, create_file}, @@ -563,7 +566,7 @@ impl SymbolMap { table: &JumpTable, ) -> Result<(SymbolId, &Symbol), SymbolMapError> { let name = Self::label_name(table.address); - self.add_if_new_address(Symbol::new_jump_table(name, table.address, table.size, table.code)) + self.add_if_new_address(Symbol::new_jump_table(name, table.address, table.size, table.kind)) } pub fn add_data( @@ -985,10 +988,10 @@ impl Symbol { } } - pub fn new_jump_table(name: String, addr: u32, size: u32, code: bool) -> Self { + pub fn new_jump_table(name: String, addr: u32, size: u32, kind: JumpTableKind) -> Self { Self { name, - kind: SymbolKind::JumpTable(SymJumpTable { size, code }), + kind: SymbolKind::JumpTable(SymJumpTable { size, kind }), addr, ambiguous: false, local: true, @@ -1301,7 +1304,7 @@ impl Display for InstructionMode { #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct SymJumpTable { pub size: u32, - pub code: bool, + pub kind: JumpTableKind, } #[derive(Clone, Copy, PartialEq, Eq, Debug)] From 5ed48f53c088887240334119840d887c805b2e1a Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:00:35 +0200 Subject: [PATCH 35/45] init: Don't set upper bound when pool constant is used for tail calls --- cli/src/analysis/data.rs | 3 +- cli/src/analysis/signature.rs | 4 +- cli/src/cmd/fix/thumb_nop.rs | 2 +- lib/src/analysis/ctor.rs | 4 +- lib/src/analysis/data.rs | 2 +- lib/src/analysis/functions.rs | 156 ++++++++++++++++++++++++++-------- lib/src/analysis/main.rs | 11 ++- lib/src/config/module.rs | 5 +- 8 files changed, 136 insertions(+), 51 deletions(-) diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index 75ff483..ec46d74 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -194,8 +194,7 @@ fn find_external_data_from_pools( function: &Function, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - let module = &options.modules[options.module_index]; - for pool_constant in function.iter_pool_constants(module.code(), module.base_address()) { + for pool_constant in function.iter_pool_constants() { find_external_data(options, pool_constant.address, pool_constant.value, result)?; } Ok(()) diff --git a/cli/src/analysis/signature.rs b/cli/src/analysis/signature.rs index 8a51298..fa5321d 100644 --- a/cli/src/analysis/signature.rs +++ b/cli/src/analysis/signature.rs @@ -92,7 +92,7 @@ impl Signatures { for (address, ins, parsed_ins) in parser { let mut ins_bitmask: u32 = 0xffffffff; - if function.pool_constants().contains(&address) { + if function.pool_constants().contains_key(&address) { // TODO: Only mask out pool constants which are pointers? parser.seek_forward(address + 4); // Skip pool constants bitmask.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]); @@ -181,7 +181,7 @@ impl Signatures { relocations.push(SignatureRelocation { offset, name, kind, addend }); } - for &address in function.pool_constants() { + for &address in function.pool_constants().keys() { let offset = (address - function.start_address()) as usize; bitmask[offset..offset + 4].fill(0); pattern[offset..offset + 4].fill(0); diff --git a/cli/src/cmd/fix/thumb_nop.rs b/cli/src/cmd/fix/thumb_nop.rs index af0a507..17f6c02 100644 --- a/cli/src/cmd/fix/thumb_nop.rs +++ b/cli/src/cmd/fix/thumb_nop.rs @@ -89,7 +89,7 @@ impl FixThumbNop { } let last_instruction_address = function.end_address() - 2; - if function.pool_constants().contains(&(last_instruction_address & !3)) { + if function.pool_constants().contains_key(&(last_instruction_address & !3)) { continue; } // Function is Thumb and does not end with a pool constant diff --git a/lib/src/analysis/ctor.rs b/lib/src/analysis/ctor.rs index e9f2969..d218cc2 100644 --- a/lib/src/analysis/ctor.rs +++ b/lib/src/analysis/ctor.rs @@ -135,9 +135,9 @@ impl CtorRange { Err(e) => return Err(e.into()), }; - let p_ctor_start = run_inits_func + let (p_ctor_start, _) = run_inits_func .pool_constants() - .first() + .first_key_value() .ok_or_else(|| NoInitPoolConstantsSnafu.build())?; let ctor_start_data = &run_inits_code[(p_ctor_start - run_inits_addr) as usize..]; let ctor_start = u32::from_le_bytes([ diff --git a/lib/src/analysis/data.rs b/lib/src/analysis/data.rs index b606985..d91d59f 100644 --- a/lib/src/analysis/data.rs +++ b/lib/src/analysis/data.rs @@ -60,7 +60,7 @@ pub fn find_local_data_from_pools( } = options; let address_range = None; - for pool_constant in function.iter_pool_constants(code, base_address) { + for pool_constant in function.iter_pool_constants() { let pointer = pool_constant.value; if let Some(reloc_kind) = relocation_overrides.get(&pointer) { relocations.add(Relocation::new(RelocationOptions { diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 4b6400e..5105eb0 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -27,7 +27,7 @@ use crate::{ // All keys in the types below are instruction addresses pub type Labels = BTreeSet; -pub type PoolConstants = BTreeSet; +pub type PoolConstants = BTreeMap; pub type JumpTables = BTreeMap; pub type InlineTables = BTreeMap; pub type FunctionCalls = BTreeMap; @@ -109,7 +109,12 @@ impl Function { Some((address as i32 + dest).try_into().unwrap()) } - fn is_pool_load(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option { + fn is_pool_load( + ins: Ins, + parsed_ins: &ParsedIns, + address: u32, + thumb: bool, + ) -> Option<(u32, Register)> { if ins.mnemonic() != "ldr" { return None; } @@ -125,7 +130,7 @@ impl Function { // ldr *, [pc + *] let load_address = (address as i32 + offset.value) as u32 & !3; let load_address = load_address + if thumb { 4 } else { 8 }; - Some(load_address) + Some((load_address, dest.reg)) } } _ => None, @@ -187,7 +192,7 @@ impl Function { } }; - if let Some(first_pool_address) = function.pool_constants.first() + if let Some((first_pool_address, _)) = function.pool_constants.first_key_value() && *first_pool_address < function.start_address { log::info!( @@ -416,7 +421,7 @@ impl Function { // Look for pointers to data in this module, to use as an upper bound for finding functions if search_options.use_data_as_upper_bound { - for pool_constant in function.iter_pool_constants(module_code, base_address) { + for pool_constant in function.iter_pool_constants() { let pointer_value = pool_constant.value & !1; if upper_bounds.contains(&pointer_value) { continue; @@ -425,22 +430,30 @@ impl Function { continue; } - let offset = (pointer_value - base_address) as usize; - if offset >= module_code.len() { - continue; - } + match &pool_constant.usage { + // Not data, skip + PoolConstantUsage::Call => continue, + // Maybe data, run basic check for whether it is code + PoolConstantUsage::Other => { + let offset = (pointer_value - base_address) as usize; + if offset >= module_code.len() { + continue; + } - let thumb = Function::is_thumb_function(pointer_value, &module_code[offset..]); - let mut parser = Parser::new( - if thumb { ParseMode::Thumb } else { ParseMode::Arm }, - pointer_value, - Endian::Little, - PARSE_FLAGS, - &module_code[offset..], - ); - let (address, ins, parsed_ins) = parser.next().unwrap(); - if is_valid_function_start(address, ins, &parsed_ins) { - continue; + let thumb = + Function::is_thumb_function(pointer_value, &module_code[offset..]); + let mut parser = Parser::new( + if thumb { ParseMode::Thumb } else { ParseMode::Arm }, + pointer_value, + Endian::Little, + PARSE_FLAGS, + &module_code[offset..], + ); + let (address, ins, parsed_ins) = parser.next().unwrap(); + if is_valid_function_start(address, ins, &parsed_ins) { + continue; + } + } } // The pool constant points to data, limit the upper bound @@ -466,7 +479,7 @@ impl Function { for address in self.labels.iter() { symbol_map.add_label(*address, self.thumb)?; } - for address in self.pool_constants.iter() { + for (address, _) in self.pool_constants.iter() { symbol_map.add_pool_constant(*address)?; } for jump_table in self.jump_tables() { @@ -578,16 +591,8 @@ impl Function { &self.pool_constants } - pub fn iter_pool_constants<'a>( - &'a self, - module_code: &'a [u8], - base_address: u32, - ) -> impl Iterator + 'a { - self.pool_constants.iter().map(move |&address| { - let start = (address - base_address) as usize; - let bytes = &module_code[start..]; - PoolConstant { address, value: u32::from_le_slice(bytes) } - }) + pub fn iter_pool_constants(&self) -> impl Iterator { + self.pool_constants.values() } pub fn function_calls(&self) -> &FunctionCalls { @@ -639,6 +644,9 @@ struct ParseFunctionContext<'a> { module_end_address: u32, existing_functions: Option<&'a BTreeMap>, found_functions: &'a BTreeMap, + base_address: u32, + /// The code for this module, starting at `base_address` + code: &'a [u8], /// Address of last conditional instruction, so we can detect the final return instruction last_conditional_destination: Option, @@ -657,12 +665,18 @@ struct ParseFunctionContext<'a> { /// Whether to check that all registers used in the instruction are defined check_defs_uses: bool, defined_registers: BTreeSet, + register_values: [Option<(u32, RegValueSrc)>; 16], prev_ins: Option, prev_parsed_ins: Option, prev_address: Option, } +#[derive(Clone, Copy)] +enum RegValueSrc { + PoolConstant(u32), +} + #[derive(Debug, Snafu)] pub enum IntoFunctionError { #[snafu(display("Cannot turn parse context into function before parsing is done"))] @@ -680,11 +694,13 @@ impl<'a> ParseFunctionContext<'a> { let FunctionParseOptions { name, start_address, + base_address, known_end_address, module_start_address, module_end_address, existing_functions, check_defs_uses, + module_code, .. } = options; @@ -720,6 +736,8 @@ impl<'a> ParseFunctionContext<'a> { module_end_address, existing_functions, found_functions, + base_address, + code: module_code, last_conditional_destination: None, last_pool_address: None, @@ -735,6 +753,7 @@ impl<'a> ParseFunctionContext<'a> { check_defs_uses, defined_registers, + register_values: [None; 16], prev_ins: None, prev_parsed_ins: None, @@ -749,7 +768,7 @@ impl<'a> ParseFunctionContext<'a> { ins: Ins, parsed_ins: &ParsedIns, ) -> ParseFunctionState { - if self.pool_constants.contains(&address) { + if self.pool_constants.contains_key(&address) { parser.seek_forward(address + 4); return ParseFunctionState::Continue; } @@ -819,6 +838,43 @@ impl<'a> ParseFunctionContext<'a> { } } + // Check register usage + #[allow(clippy::single_match)] // Remove this line if more cases are added + match (parsed_ins.mnemonic, &parsed_ins.args[0]) { + ("bx", Argument::Reg(Reg { reg, .. })) => { + if let Some((_, src)) = &self.register_values[*reg as usize] { + match src { + RegValueSrc::PoolConstant(pool_address) => { + self.pool_constants.get_mut(pool_address).unwrap().usage = + PoolConstantUsage::Call; + } + } + } + } + _ => {} + } + + // Clear tracked register values + if let Some(defs) = match ins { + Ins::Arm(ins) => Some(ins.defs(&PARSE_FLAGS)), + Ins::Thumb(ins) => Some(ins.defs(&PARSE_FLAGS)), + Ins::Data => None, + } { + for def in defs { + match def { + Argument::Reg(reg) => { + self.register_values[reg.reg as usize] = None; + } + Argument::RegList(reg_list) => { + for reg in reg_list.iter() { + self.register_values[reg as usize] = None; + } + } + _ => {} + } + } + } + let in_conditional_block = Some(address) < self.last_conditional_destination; let is_return = self.is_return( ins, @@ -908,8 +964,28 @@ impl<'a> ParseFunctionContext<'a> { } } - if let Some(pool_address) = Function::is_pool_load(ins, parsed_ins, address, self.thumb) { - self.pool_constants.insert(pool_address); + if let Some((pool_address, register)) = + Function::is_pool_load(ins, parsed_ins, address, self.thumb) + { + let start = (pool_address - self.base_address) as usize; + let Some(bytes) = self.code.get(start..) else { + panic!( + "{:#010x} {:#010x} {} {:x?}", + address, + self.base_address, + pool_address as isize - self.base_address as isize, + &self.code[0..16] + ); + }; + let const_value = u32::from_le_slice(bytes); + self.register_values[register as usize] = + Some((const_value, RegValueSrc::PoolConstant(pool_address))); + + self.pool_constants.insert(pool_address, PoolConstant { + address: pool_address, + value: const_value, + usage: PoolConstantUsage::Other, + }); self.last_pool_address = self.last_pool_address.max(Some(pool_address)); } @@ -1032,7 +1108,7 @@ impl<'a> ParseFunctionContext<'a> { } let next_address = address + ins_size; - if self.pool_constants.contains(&next_address) { + if self.pool_constants.contains_key(&next_address) { let branch_backwards = destination <= address; // Load instructions in ARM mode can have an offset of up to ±4kB. Therefore, some functions must @@ -1059,7 +1135,7 @@ impl<'a> ParseFunctionContext<'a> { } else { let after_pools = (next_address..) .step_by(4) - .find(|addr| !self.pool_constants.contains(addr)) + .find(|addr| !self.pool_constants.contains_key(addr)) .unwrap(); log::warn!( "No label past constant pool at {:#x}, jumping to first address not occupied by a pool constant ({:#x})", @@ -1283,7 +1359,15 @@ pub struct CalledFunction { pub thumb: bool, } +#[derive(Debug, Clone)] pub struct PoolConstant { pub address: u32, pub value: u32, + pub usage: PoolConstantUsage, +} + +#[derive(Debug, Clone)] +pub enum PoolConstantUsage { + Call, + Other, } diff --git a/lib/src/analysis/main.rs b/lib/src/analysis/main.rs index 3c0c7fa..442b86b 100644 --- a/lib/src/analysis/main.rs +++ b/lib/src/analysis/main.rs @@ -37,9 +37,12 @@ impl MainFunction { let mut parser = function.parser(module_code, base_address); let ins_size = parser.mode.instruction_size(0) as u32; - let last_ins_addr = - function.pool_constants().first().ok_or_else(|| NoPoolConstantsSnafu.build())? - - ins_size; + let last_ins_addr = function + .pool_constants() + .first_key_value() + .map(|(pool_addr, _)| pool_addr) + .ok_or_else(|| NoPoolConstantsSnafu.build())? + - ins_size; parser.seek_forward(last_ins_addr); let (_, _, last_ins) = parser.next().unwrap(); @@ -51,7 +54,7 @@ impl MainFunction { let mut p_tail_call = None; for (address, _ins, parsed_ins) in function.parser(module_code, base_address) { - if function.pool_constants().contains(&address) { + if function.pool_constants().contains_key(&address) { break; } let args = &parsed_ins.args; diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index a93a57d..5e20a5a 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -407,13 +407,12 @@ impl Module { if sym_function.unknown { continue; } - let offset = symbol.addr - base_address; let size = sym_function.size; let parse_result = Function::parse_function(FunctionParseOptions { name: symbol.name.clone(), start_address: symbol.addr, - base_address: symbol.addr, - module_code: &code[offset as usize..], + base_address, + module_code: code, known_end_address: Some(symbol.addr + size), module_start_address: base_address, module_end_address: end_address, From e13d8fb82ffb37e52d9fcd72bf54a24dc97bda45 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:12:16 +0200 Subject: [PATCH 36/45] dump elf-function: New subcommand for inspecting functions after linking --- cli/src/cmd/dis.rs | 2 +- cli/src/cmd/dump/elf_function.rs | 194 +++++++++++++++++++++++++++++++ cli/src/cmd/dump/mod.rs | 4 + cli/src/config/mod.rs | 1 + cli/src/config/module.rs | 33 ++++++ cli/src/config/symbol.rs | 37 ++---- 6 files changed, 245 insertions(+), 26 deletions(-) create mode 100644 cli/src/cmd/dump/elf_function.rs create mode 100644 cli/src/config/module.rs diff --git a/cli/src/cmd/dis.rs b/cli/src/cmd/dis.rs index 811f1df..a25cb3e 100644 --- a/cli/src/cmd/dis.rs +++ b/cli/src/cmd/dis.rs @@ -140,7 +140,7 @@ impl Disassemble { module_kind: module.kind(), symbol_map, symbol_maps, - relocations: module.relocations(), + relocations: Some(module.relocations()), }; let mut symbol_iter = symbol_map.iter_by_address(section.address_range()).peekable(); diff --git a/cli/src/cmd/dump/elf_function.rs b/cli/src/cmd/dump/elf_function.rs new file mode 100644 index 0000000..16ff48f --- /dev/null +++ b/cli/src/cmd/dump/elf_function.rs @@ -0,0 +1,194 @@ +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use clap::Args; +use ds_decomp::{ + analysis::functions::{Function, FunctionParseOptions, ParseFunctionOptions}, + config::{config::Config, module::ModuleKind, symbol::SymbolMaps}, +}; +use object::{Object, ObjectSection, ObjectSymbol}; + +use crate::{ + analysis::functions::FunctionExt, + config::{ + module::ModuleKindExt, + symbol::{SymbolLookup, SymbolMapsExt}, + }, + util::io::read_file, +}; + +/// Dumps info about a function from the linked ELF file. +#[derive(Args, Clone)] +pub struct DumpElfFunction { + /// Path to config.yaml. + #[arg(long, short = 'c')] + config_path: PathBuf, + + // Name of the ELF file, defaults to arm9.o. + #[arg(long, short = 'e', default_value = "arm9.o")] + elf_name: String, + + /// Name of the function. + #[arg(long, short = 'n')] + name: String, +} + +impl DumpElfFunction { + pub fn run(&self) -> anyhow::Result<()> { + let config = Config::from_file(&self.config_path)?; + let config_path = self.config_path.parent().unwrap(); + + let elf_data = self.read_elf(&config, config_path)?; + let object = self.parse_elf(&elf_data)?; + + let symbol = self.find_symbol(&object)?; + self.print_symbol(&symbol); + + let section = self.get_section_for_symbol(&object, &symbol)?; + let section_name = self.get_section_name(§ion)?; + self.print_section(section_name); + + let module_kind = self.infer_module_kind(section_name)?; + self.print_module(module_kind); + + for section in object.sections() { + for (src_addr, relocation) in section.relocations() { + let object::RelocationTarget::Symbol(symbol_index) = relocation.target() else { + continue; + }; + if symbol_index != symbol.index() { + continue; + } + let section_name = self.get_section_name(§ion)?; + let module_kind = self.infer_module_kind(section_name)?; + println!(" Relocation from: {:#010x} in {}", src_addr, module_kind); + } + } + + let data = self.read_data(&config, module_kind)?; + self.print_data(&symbol, §ion, &data); + self.print_disassembly(&object, &symbol, §ion, module_kind, &data)?; + + Ok(()) + } + + fn read_elf(&self, config: &Config, config_path: &Path) -> Result, anyhow::Error> { + let build_path = config_path.join(&config.build_path); + let elf_path = build_path.join(&self.elf_name); + Ok(read_file(&elf_path)?) + } + + fn parse_elf<'a>(&self, data: &'a [u8]) -> Result, anyhow::Error> { + Ok(object::File::parse(data)?) + } + + fn find_symbol<'a>(&self, object: &'a object::File<'_>) -> Result> { + object.symbol_by_name(&self.name).context("No function with that name was found") + } + + fn print_symbol(&self, symbol: &object::Symbol<'_, '_>) { + println!("{}:", self.name); + println!(" Address: {:#010x}", symbol.address()); + println!(" Size: {:#x}", symbol.size()); + } + + fn get_section_for_symbol<'a>( + &self, + object: &'a object::File<'_>, + symbol: &object::Symbol<'_, '_>, + ) -> Result> { + let section_index = symbol.section_index().context("Function symbol has no section")?; + object.section_by_index(section_index).context("Function's section not found") + } + + fn get_section_name<'a>(&self, section: &'a object::Section<'_, '_>) -> Result<&'a str> { + section.name().context("Failed to get section name") + } + + fn print_section(&self, section_name: &str) { + println!(" Section: {}", section_name); + } + + fn infer_module_kind(&self, section_name: &str) -> Result { + ModuleKind::from_linked_section_name(section_name) + .context("Failed to get module kind")? + .context("Section name does not match any known module") + } + + fn print_module(&self, module_kind: ModuleKind) { + println!(" Module: {}", module_kind); + } + + fn read_data(&self, config: &Config, module_kind: ModuleKind) -> Result> { + let config_path = self.config_path.parent().unwrap(); + let config_module = config + .get_module_config_by_kind(module_kind) + .with_context(|| format!("{} not found in config.yaml", module_kind))?; + let bin_file_path = config_path.join(&config_module.object); + read_file(&bin_file_path).with_context(|| { + format!("Failed to read section data from {}", bin_file_path.display()) + }) + } + + fn print_data( + &self, + symbol: &object::Symbol<'_, '_>, + section: &object::Section<'_, '_>, + data: &[u8], + ) { + println!(" Data:"); + let start = (symbol.address() + 1).next_multiple_of(16) - 16; + let end = (symbol.address() + symbol.size()).next_multiple_of(16); + for row_address in (start..end).step_by(16) { + print!(" {:08x} ", row_address); + for i in 0..16 { + let address = row_address + i; + if (symbol.address()..symbol.address() + symbol.size()).contains(&address) { + print!(" {:02x}", data[(address - section.address()) as usize]); + } else { + print!(" .."); + } + } + println!(); + } + } + + fn print_disassembly( + &self, + object: &object::File<'_>, + symbol: &object::Symbol<'_, '_>, + section: &object::Section<'_, '_>, + module_kind: ModuleKind, + data: &[u8], + ) -> Result<(), anyhow::Error> { + let function = Function::parse_function(FunctionParseOptions { + name: self.name.clone(), + start_address: symbol.address() as u32, + base_address: section.address() as u32, + module_code: data, + known_end_address: None, + module_start_address: section.address() as u32, + module_end_address: section.address() as u32 + data.len() as u32, + existing_functions: None, + check_defs_uses: false, + parse_options: ParseFunctionOptions { thumb: None }, + }) + .context("Failed to parse function")?; + + let symbol_maps = + SymbolMaps::from_object(object).context("Failed to construct symbol maps from ELF")?; + let symbol_map = symbol_maps.get(module_kind).unwrap(); + let symbol_lookup = + SymbolLookup { module_kind, symbol_map, symbol_maps: &symbol_maps, relocations: None }; + + println!("Disassembly:"); + function.write_assembly( + &mut std::io::stdout().lock(), + &symbol_lookup, + data, + section.address() as u32, + true, + )?; + Ok(()) + } +} diff --git a/cli/src/cmd/dump/mod.rs b/cli/src/cmd/dump/mod.rs index ef4bee0..ba45128 100644 --- a/cli/src/cmd/dump/mod.rs +++ b/cli/src/cmd/dump/mod.rs @@ -1,8 +1,10 @@ mod ambig_relocs; +mod elf_function; mod elf_symbols; use ambig_relocs::*; use clap::{Args, Subcommand}; +use elf_function::*; use elf_symbols::*; /// Subcommands for dumping information from a dsd project. @@ -17,6 +19,7 @@ impl DumpArgs { match &self.command { DumpCommands::ElfSymbols(dump_elf_symbols) => dump_elf_symbols.run(), DumpCommands::AmbigRelocs(dump_ambig_relocs) => dump_ambig_relocs.run(), + DumpCommands::ElfFunction(dump_elf_function) => dump_elf_function.run(), } } } @@ -25,4 +28,5 @@ impl DumpArgs { enum DumpCommands { ElfSymbols(DumpElfSymbols), AmbigRelocs(DumpAmbigRelocs), + ElfFunction(DumpElfFunction), } diff --git a/cli/src/config/mod.rs b/cli/src/config/mod.rs index 07b76e1..9c401e4 100644 --- a/cli/src/config/mod.rs +++ b/cli/src/config/mod.rs @@ -1,4 +1,5 @@ pub mod delinks; +pub mod module; pub mod program; pub mod relocation; pub mod section; diff --git a/cli/src/config/module.rs b/cli/src/config/module.rs new file mode 100644 index 0000000..bef8973 --- /dev/null +++ b/cli/src/config/module.rs @@ -0,0 +1,33 @@ +use anyhow::{Result, anyhow}; +use ds_decomp::config::module::ModuleKind; +use ds_rom::rom::raw::AutoloadKind; + +pub trait ModuleKindExt +where + Self: Sized, +{ + fn from_linked_section_name(section_name: &str) -> Result>; +} + +impl ModuleKindExt for ModuleKind { + fn from_linked_section_name(section_name: &str) -> Result> { + match section_name { + "ARM9" => Ok(Some(ModuleKind::Arm9)), + "ITCM" => Ok(Some(ModuleKind::Autoload(AutoloadKind::Itcm))), + "DTCM" => Ok(Some(ModuleKind::Autoload(AutoloadKind::Dtcm))), + name if name.starts_with("OV") => { + let id = name[2..].parse::().map_err(|_| { + anyhow!("Invalid overlay ID in linked object section name '{section_name}'") + })?; + Ok(Some(ModuleKind::Overlay(id))) + } + name if name.starts_with("AUTOLOAD_") => { + let index = name[9..].parse::().map_err(|_| { + anyhow!("Invalid autoload index in linked object section name '{section_name}'") + })?; + Ok(Some(ModuleKind::Autoload(AutoloadKind::Unknown(index)))) + } + _ => Ok(None), + } + } +} diff --git a/cli/src/config/symbol.rs b/cli/src/config/symbol.rs index 5931fca..d316c32 100644 --- a/cli/src/config/symbol.rs +++ b/cli/src/config/symbol.rs @@ -13,12 +13,11 @@ use ds_decomp::{ }, }, }; -use ds_rom::rom::raw::AutoloadKind; use object::{Object, ObjectSection, ObjectSymbol}; use unarm::LookupSymbol; use super::relocation::RelocationModuleExt; -use crate::util::bytes::FromSlice; +use crate::{config::module::ModuleKindExt, util::bytes::FromSlice}; pub struct LookupSymbolMap(SymbolMap); @@ -69,25 +68,8 @@ impl SymbolMapsExt for SymbolMaps { } else { continue; }; - let module_kind = match section_name { - "ARM9" => ModuleKind::Arm9, - "ITCM" => ModuleKind::Autoload(AutoloadKind::Itcm), - "DTCM" => ModuleKind::Autoload(AutoloadKind::Dtcm), - name if name.starts_with("OV") => { - let id = name[2..].parse::().map_err(|_| { - anyhow!("Invalid overlay ID in linked object section name '{section_name}'") - })?; - ModuleKind::Overlay(id) - } - name if name.starts_with("AUTOLOAD_") => { - let index = name[9..].parse::().map_err(|_| { - anyhow!( - "Invalid autoload index in linked object section name '{section_name}'" - ) - })?; - ModuleKind::Autoload(AutoloadKind::Unknown(index)) - } - _ => continue, + let Some(module_kind) = ModuleKind::from_linked_section_name(section_name)? else { + continue; }; let symbol_map = symbol_maps.get_mut(module_kind); @@ -247,7 +229,7 @@ pub struct SymbolLookup<'a> { pub symbol_map: &'a SymbolMap, /// All symbol maps, including external modules pub symbol_maps: &'a SymbolMaps, - pub relocations: &'a Relocations, + pub relocations: Option<&'a Relocations>, } impl SymbolLookup<'_> { @@ -259,7 +241,10 @@ impl SymbolLookup<'_> { new_line: &mut bool, indent: &str, ) -> Result { - if let Some(relocation) = self.relocations.get(source) { + let Some(relocations) = self.relocations else { + return Ok(false); + }; + if let Some(relocation) = relocations.get(source) { let relocation_to = relocation.module(); if let Some(module_kind) = relocation_to.first_module() { let symbol_address = (destination as i64 - relocation.addend()) as u32; @@ -325,7 +310,8 @@ impl SymbolLookup<'_> { source: u32, destination: u32, ) -> Result<()> { - let Some(relocation) = self.relocations.get(source) else { return Ok(()) }; + let Some(relocations) = self.relocations else { return Ok(()) }; + let Some(relocation) = relocations.get(source) else { return Ok(()) }; if let Some(overlays) = relocation.module().other_modules() { write!(w, " ; ")?; @@ -365,7 +351,8 @@ impl LookupSymbol for SymbolLookup<'_> { if let Some((_, symbol)) = self.symbol_map.first_at_address(destination) { return Some(&symbol.name); } - if let Some(relocation) = self.relocations.get(source) { + let relocations = self.relocations?; + if let Some(relocation) = relocations.get(source) { let module_kind = relocation.module().first_module()?; let external_symbol_map = self.symbol_maps.get(module_kind).unwrap(); From db82e402f29dc1d386602aa177960deacda2fa0d Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:12:42 +0200 Subject: [PATCH 37/45] roundtrip: Build ROM and compare to base ROM --- cli/tests/test_roundtrip.rs | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/cli/tests/test_roundtrip.rs b/cli/tests/test_roundtrip.rs index 8aad352..454a895 100644 --- a/cli/tests/test_roundtrip.rs +++ b/cli/tests/test_roundtrip.rs @@ -8,7 +8,7 @@ use std::{ process::Command, }; -use anyhow::Result; +use anyhow::{Result, bail}; use ds_decomp::{ analysis::FindLocalDataError, config::{config::Config, module::ModuleError}, @@ -19,7 +19,7 @@ use ds_decomp_cli::{ }; use ds_rom::{ crypto::blowfish::BlowfishKey, - rom::{Rom, raw}, + rom::{Rom, RomLoadOptions, raw}, }; use log::LevelFilter; use zip::ZipArchive; @@ -55,7 +55,10 @@ fn test_roundtrip() -> Result<()> { // Extract ROM let base_name = path.with_extension("").file_name().unwrap().to_str().unwrap().to_string(); let project_path = roms_dir.join(&base_name); - let extract_path = extract_rom(&path, &project_path, &key)?; + let extract_path = project_path.join("extract"); + let raw_rom = raw::Rom::from_file(path)?; + let rom = Rom::extract(&raw_rom)?; + rom.save(&extract_path, Some(&key))?; let rom_config = extract_path.join("config.yaml"); // Init dsd project @@ -74,7 +77,7 @@ fn test_roundtrip() -> Result<()> { })?; let dsd_config_yaml = dsd_config_dir.join("arm9/config.yaml"); let dsd_config = Config::from_file(&dsd_config_yaml)?; - let target_config_dir = configs_dir.join(base_name); + let target_config_dir = configs_dir.join(&base_name); if allowed_unknown_function_calls { assert!( target_config_dir.exists(), @@ -147,6 +150,22 @@ fn test_roundtrip() -> Result<()> { ConfigRom { elf: linker_out_file.clone(), config: dsd_config_yaml.clone() }; config_rom.run()?; + // Build ROM + let rom_config_path = dsd_config_dir + .join("arm9") + .join(&dsd_config.main_module.object) + .with_file_name("rom_config.yaml"); + let rom_load_options = RomLoadOptions { key: Some(&key), ..Default::default() }; + let rom = Rom::load(&rom_config_path, rom_load_options)?; + let built_rom = rom.build(Some(&key))?; + let rom_path = project_path.join(format!("build_{base_name}.nds")); + built_rom.save(rom_path)?; + + // Compare ROMs + if built_rom.data() != raw_rom.data() { + bail!("Built ROM does not match base ROM"); + } + fs::remove_dir_all(project_path)?; } @@ -193,14 +212,6 @@ fn dsd_init( Ok(dsd_config_dir) } -fn extract_rom(path: &Path, project_path: &Path, key: &BlowfishKey) -> Result { - let extract_path = project_path.join("extract"); - let raw_rom = raw::Rom::from_file(path)?; - let rom = Rom::extract(&raw_rom)?; - rom.save(&extract_path, Some(key))?; - Ok(extract_path) -} - fn directory_equals(target: &Path, base: &Path) -> Result { log::debug!("Comparing target directory '{}' with base '{}'", target.display(), base.display()); From f51047a48e525a9a3c03dbd2d156d90c196733fb Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:13:07 +0200 Subject: [PATCH 38/45] check symbols: Update error message --- cli/src/cmd/check/symbols.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/cmd/check/symbols.rs b/cli/src/cmd/check/symbols.rs index b0ab558..2636ccf 100644 --- a/cli/src/cmd/check/symbols.rs +++ b/cli/src/cmd/check/symbols.rs @@ -118,7 +118,7 @@ impl CheckSymbols { else { num_mismatches += 1; log::error!( - "Symbol '{}' in {} at {:#010x} not found by fuzzy name in linked binary", + "Symbol '{}' in {} at {:#010x} not found by name in linked binary", target_symbol.name, module_kind, target_symbol.addr From 872b18c0bb034bb249e03821d52a69bbac769ed6 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:14:05 +0200 Subject: [PATCH 39/45] rom config: Use `object` name from config.yaml instead of assuming file names --- cli/src/cmd/rom/config.rs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/cli/src/cmd/rom/config.rs b/cli/src/cmd/rom/config.rs index 9cd4d72..8241c18 100644 --- a/cli/src/cmd/rom/config.rs +++ b/cli/src/cmd/rom/config.rs @@ -152,7 +152,8 @@ impl ConfigRom { .with_context(|| format!("Failed to find overlay {} in ROM", overlay.id))?; let module_name = format!("OV{:03}", overlay.id); - let file_name = format!("arm9_ov{:03}.bin", overlay.id); + let file_name = + overlay.module.object.file_name().unwrap().to_string_lossy().to_string(); let ctor_start = object .symbol_by_name(&format!("{module_name}_CTOR_START")) @@ -236,12 +237,10 @@ impl ConfigRom { .find(|a| a.base_address() == base_address) .with_context(|| format!("Failed to find autoload {} in ROM", autoload.kind))?; - let (module_name, file_name) = match autoload.kind { - AutoloadKind::Itcm => ("ITCM".into(), "itcm.yaml".into()), - AutoloadKind::Dtcm => ("DTCM".into(), "dtcm.yaml".into()), - AutoloadKind::Unknown(index) => { - (format!("AUTOLOAD_{index}"), format!("autoload_{index}.yaml")) - } + let module_name = match autoload.kind { + AutoloadKind::Itcm => "ITCM".into(), + AutoloadKind::Dtcm => "DTCM".into(), + AutoloadKind::Unknown(index) => format!("AUTOLOAD_{index}"), }; let mut autoload_info = *rom_autoload.info(); @@ -261,8 +260,17 @@ impl ConfigRom { autoload_info.list_entry.code_size.next_multiple_of(text_section.alignment()); } + let yaml_file_name = autoload + .module + .object + .with_extension("yaml") + .file_name() + .unwrap() + .to_string_lossy() + .to_string(); + let binary_path = config_path.join(&autoload.module.object); - let yaml_path = binary_path.parent().unwrap().join(file_name); + let yaml_path = binary_path.parent().unwrap().join(yaml_file_name); serde_saphyr::to_io_writer(&mut create_file(&yaml_path)?, &autoload_info)?; match autoload.kind { From a5ee86ea6cf1e34c4e539ed28e4f6506e5614645 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:14:32 +0200 Subject: [PATCH 40/45] rom config: Provide file names for unknown autoloads --- cli/src/cmd/rom/config.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cli/src/cmd/rom/config.rs b/cli/src/cmd/rom/config.rs index 8241c18..31beca0 100644 --- a/cli/src/cmd/rom/config.rs +++ b/cli/src/cmd/rom/config.rs @@ -282,7 +282,17 @@ impl ConfigRom { rom_paths.dtcm.bin = Self::make_path(binary_path, rom_paths_dir); rom_paths.dtcm.config = Self::make_path(yaml_path, rom_paths_dir); } - AutoloadKind::Unknown(_) => {} + AutoloadKind::Unknown(index) => { + let autoload = rom_paths + .unknown_autoloads + .iter_mut() + .find(|a| a.index == index) + .with_context(|| { + format!("Failed to find autoload {} in ROM config", index) + })?; + autoload.files.bin = Self::make_path(binary_path, rom_paths_dir); + autoload.files.config = Self::make_path(yaml_path, rom_paths_dir); + } } } From bb72833ea49511a01cbbbb35e1fe5a1532bc1d19 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 13:14:46 +0200 Subject: [PATCH 41/45] rom config: Create bin files if linker doesn't generate one --- cli/src/cmd/rom/config.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cli/src/cmd/rom/config.rs b/cli/src/cmd/rom/config.rs index 31beca0..2d5bec8 100644 --- a/cli/src/cmd/rom/config.rs +++ b/cli/src/cmd/rom/config.rs @@ -273,6 +273,11 @@ impl ConfigRom { let yaml_path = binary_path.parent().unwrap().join(yaml_file_name); serde_saphyr::to_io_writer(&mut create_file(&yaml_path)?, &autoload_info)?; + if !binary_path.exists() { + // Linker does not create binary file if module is completely empty + create_file(&binary_path)?; + } + match autoload.kind { AutoloadKind::Itcm => { rom_paths.itcm.bin = Self::make_path(binary_path, rom_paths_dir); From 1807ab92bfe3ff867b981e07bdae8685959596b3 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 17:07:06 +0200 Subject: [PATCH 42/45] rom config: Optimize performance By default, `object::Object::symbol_by_name` scans through all symbols until it finds a match. `dsd rom config` uses it to find section addresses for each module, so it ends up doing a lot of passes through all symbols. Instead, we now pass through all symbols *once*, registering them in a HashMap so they can be queried much quicker later. --- cli/src/cmd/rom/config.rs | 69 +++++++++++++++++++++++---------------- cli/src/config/section.rs | 23 ++++++++----- cli/src/util/mod.rs | 1 + cli/src/util/object.rs | 24 ++++++++++++++ 4 files changed, 80 insertions(+), 37 deletions(-) create mode 100644 cli/src/util/object.rs diff --git a/cli/src/cmd/rom/config.rs b/cli/src/cmd/rom/config.rs index 2d5bec8..8914cf6 100644 --- a/cli/src/cmd/rom/config.rs +++ b/cli/src/cmd/rom/config.rs @@ -14,13 +14,16 @@ use ds_decomp::config::{ use ds_rom::rom::{ OverlayConfig, OverlayTableConfig, Rom, RomConfig, RomLoadOptions, raw::AutoloadKind, }; -use object::{Object, ObjectSection, ObjectSymbol}; +use object::{ObjectSection, ObjectSymbol}; use path_slash::PathExt; use pathdiff::diff_paths; use crate::{ config::section::SectionExt, - util::io::{create_file, open_file, read_file}, + util::{ + io::{create_file, open_file, read_file}, + object::ObjectCache, + }, }; /// Creates a configuration to build a ROM from linked binaries. @@ -61,11 +64,12 @@ impl ConfigRom { let file = read_file(&self.elf)?; let object = object::File::parse(&*file)?; + let object_cache = ObjectCache::new(&object); - self.config_arm9(&object, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; - self.config_autoloads(&object, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; + self.config_arm9(&object_cache, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; + self.config_autoloads(&object_cache, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; self.config_overlays( - &object, + &object_cache, &config, &rom, &mut rom_paths, @@ -130,7 +134,7 @@ impl ConfigRom { fn config_overlays( &self, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, config: &Config, rom: &Rom<'_>, rom_paths: &mut RomConfig, @@ -155,26 +159,30 @@ impl ConfigRom { let file_name = overlay.module.object.file_name().unwrap().to_string_lossy().to_string(); - let ctor_start = object - .symbol_by_name(&format!("{module_name}_CTOR_START")) + let ctor_start = object_cache + .symbols_by_name + .get(&format!("{module_name}_CTOR_START")) .with_context(|| format!("No CTOR_START in overlay {}", overlay.id))?; - let ctor_end = object - .symbol_by_name(&format!("{module_name}_CTOR_END")) + let ctor_end = object_cache + .symbols_by_name + .get(&format!("{module_name}_CTOR_END")) .with_context(|| format!("No CTOR_END in overlay {}", overlay.id))?; let base_address = - Self::section_ranges(&delinks.sections, &module_name, object, |_| true)? + Self::section_ranges(&delinks.sections, &module_name, object_cache, |_| true)? .unwrap() .start; let mut info = rom_overlay.info().clone(); info.base_address = base_address; - let code_range = Self::section_ranges(&delinks.sections, &module_name, object, |s| { - s.kind().is_initialized() - })?; - let bss_range = Self::section_ranges(&delinks.sections, &module_name, object, |s| { - !s.kind().is_initialized() - })?; + let code_range = + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { + s.kind().is_initialized() + })?; + let bss_range = + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { + !s.kind().is_initialized() + })?; let bss_range = bss_range .or(code_range.map(|r| r.end..r.end)) @@ -217,7 +225,7 @@ impl ConfigRom { fn config_autoloads( &self, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, config: &Config, rom: &Rom<'_>, rom_paths: &mut RomConfig, @@ -245,12 +253,12 @@ impl ConfigRom { let mut autoload_info = *rom_autoload.info(); autoload_info.list_entry.code_size = - Self::section_ranges(&delinks.sections, &module_name, object, |s| { + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { s.kind().is_initialized() })? .map_or(0, |range| range.len() as u32); autoload_info.list_entry.bss_size = - Self::section_ranges(&delinks.sections, &module_name, object, |s| { + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { !s.kind().is_initialized() })? .map_or(0, |range| range.len() as u32); @@ -306,7 +314,7 @@ impl ConfigRom { fn config_arm9( &self, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, config: &Config, rom: &Rom<'_>, rom_paths: &mut RomConfig, @@ -314,21 +322,24 @@ impl ConfigRom { ) -> Result<()> { let config_path = self.config.parent().unwrap(); - let arm9_section = object.section_by_name("ARM9").context("ARM9 section not found")?; + let arm9_section = + object_cache.sections_by_name.get("ARM9").context("ARM9 section not found")?; let build_info_symbol = - object.symbol_by_name("BuildInfo").context("BuildInfo symbol not found")?; - let autoload_callback_symbol = - object.symbol_by_name("AutoloadCallback").context("BuildInfo symbol not found")?; + object_cache.symbols_by_name.get("BuildInfo").context("BuildInfo symbol not found")?; + let autoload_callback_symbol = object_cache + .symbols_by_name + .get("AutoloadCallback") + .context("BuildInfo symbol not found")?; let delinks = Delinks::from_file(config_path.join(&config.main_module.delinks), ModuleKind::Arm9)?; - let bss_range = Self::section_ranges(&delinks.sections, "ARM9", object, |s| { + let bss_range = Self::section_ranges(&delinks.sections, "ARM9", object_cache, |s| { !s.kind().is_initialized() })? .unwrap(); let mut arm9_build_config = rom.arm9_build_config()?; arm9_build_config.offsets.base_address = arm9_section.address() as u32; - arm9_build_config.offsets.entry_function = object.entry() as u32; + arm9_build_config.offsets.entry_function = object_cache.entry; arm9_build_config.offsets.build_info = (build_info_symbol.address() - arm9_section.address()) as u32; arm9_build_config.offsets.autoload_callback = autoload_callback_symbol.address() as u32; @@ -350,7 +361,7 @@ impl ConfigRom { fn section_ranges( sections: &Sections, module_name: &str, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, predicate: F, ) -> Result>> where @@ -359,7 +370,7 @@ impl ConfigRom { Ok(sections .iter() .filter(predicate) - .map(|s| s.range_from_object(module_name, object)) + .map(|s| s.range_from_object(module_name, object_cache)) .collect::>>()? .into_iter() .reduce(|a, b| a.start.min(b.start)..a.end.max(b.end))) diff --git a/cli/src/config/section.rs b/cli/src/config/section.rs index 1fbc6c8..fc97134 100644 --- a/cli/src/config/section.rs +++ b/cli/src/config/section.rs @@ -6,7 +6,9 @@ use ds_decomp::config::{ relocations::{Relocation, RelocationKind, RelocationModule}, section::Section, }; -use object::{Object, ObjectSymbol}; +use object::ObjectSymbol; + +use crate::util::object::ObjectCache; pub trait SectionExt { fn relocatable_code(&self, module: &Module) -> Result>>; @@ -14,8 +16,11 @@ pub trait SectionExt { /// Name of this section for creating section boundary symbols, e.g. `ARM9_BSS_START` fn boundary_name(&self) -> String; - fn range_from_object(&self, module_name: &str, object: &object::File<'_>) - -> Result>; + fn range_from_object( + &self, + module_name: &str, + object_cache: &ObjectCache<'_, '_>, + ) -> Result>; } impl SectionExt for Section { @@ -78,17 +83,19 @@ impl SectionExt for Section { fn range_from_object( &self, module_name: &str, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, ) -> Result> { let boundary_name = self.boundary_name(); let boundary_start = format!("{module_name}_{boundary_name}_START"); let boundary_end = format!("{module_name}_{boundary_name}_END"); - let start = object - .symbol_by_name(&boundary_start) + let start = object_cache + .symbols_by_name + .get(&boundary_start) .with_context(|| format!("Failed to find symbol {boundary_start}"))? .address() as u32; - let end = object - .symbol_by_name(&boundary_end) + let end = object_cache + .symbols_by_name + .get(&boundary_end) .with_context(|| format!("Failed to find symbol {boundary_end}"))? .address() as u32; Ok(start..end) diff --git a/cli/src/util/mod.rs b/cli/src/util/mod.rs index 77eaaa4..6efddd4 100644 --- a/cli/src/util/mod.rs +++ b/cli/src/util/mod.rs @@ -1,5 +1,6 @@ pub mod bytes; pub mod debug; pub mod io; +pub mod object; pub mod parse; pub mod path; diff --git a/cli/src/util/object.rs b/cli/src/util/object.rs new file mode 100644 index 0000000..8a84ef5 --- /dev/null +++ b/cli/src/util/object.rs @@ -0,0 +1,24 @@ +use std::collections::HashMap; + +use object::{Object as _, ObjectSection as _, ObjectSymbol as _}; + +pub struct ObjectCache<'data, 'file> { + pub symbols_by_name: HashMap>, + pub sections_by_name: HashMap>, + pub entry: u32, +} + +impl<'data, 'file> ObjectCache<'data, 'file> { + pub fn new(object: &'data object::File<'data>) -> Self { + let symbols_by_name = object + .symbols() + .filter_map(|symbol| symbol.name().ok().map(|name| (name.to_string(), symbol))) + .collect::>(); + let sections_by_name = object + .sections() + .filter_map(|section| section.name().ok().map(|name| (name.to_string(), section))) + .collect::>(); + let entry = object.entry() as u32; + Self { symbols_by_name, sections_by_name, entry } + } +} From 1e64ab60186aa1d8624550db310bc4f5a9167f70 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 17:07:26 +0200 Subject: [PATCH 43/45] init: Treat out-of-bounds pool reads as illegal instructions --- lib/src/analysis/functions.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index 5105eb0..09919b3 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -969,13 +969,11 @@ impl<'a> ParseFunctionContext<'a> { { let start = (pool_address - self.base_address) as usize; let Some(bytes) = self.code.get(start..) else { - panic!( - "{:#010x} {:#010x} {} {:x?}", - address, - self.base_address, - pool_address as isize - self.base_address as isize, - &self.code[0..16] + log::debug!( + "Illegal instruction at {:#010x}: Pool load goes outside module", + address ); + return ParseFunctionState::IllegalIns { address, ins }; }; let const_value = u32::from_le_slice(bytes); self.register_values[register as usize] = From 97ad8c1fce4c9e247557c431c6ed141885f45984 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 17:47:25 +0200 Subject: [PATCH 44/45] init: Fix `signed: true` never getting added to config.yaml --- Cargo.lock | 5 ++--- cli/Cargo.toml | 2 +- lib/Cargo.toml | 2 +- lib/src/config/module.rs | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05c09aa..76a52ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -556,9 +556,8 @@ dependencies = [ [[package]] name = "ds-rom" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "473d5a6494a1be90379bf6f346cce68ee36baa60a997eacf947071af82f973fe" +version = "0.7.1" +source = "git+https://github.com/AetiasHax/ds-rom?branch=0.7.1#9632465c3f911d4533d78ff5e120d2a7a0667894" dependencies = [ "bitfield-struct", "bitreader", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index e737c2b..ab027cd 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,7 +20,7 @@ base64 = "0.22" clap = { version = "4.5", features = ["derive"] } cpp_demangle = "0.5" ds-decomp = { path = "../lib" } -ds-rom = "0.7" +ds-rom = { git = "https://github.com/AetiasHax/ds-rom", branch = "0.7.1" } env_logger = "0.11" fxhash = "0.2" log = "0.4" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a4137da..fb1df80 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -10,7 +10,7 @@ description = "Library for ds-decomp, a DS decompilation toolkit." [dependencies] bytemuck = { version = "1.25", features = ["derive"] } -ds-rom = "0.7" +ds-rom = { git = "https://github.com/AetiasHax/ds-rom", branch = "0.7.1" } log = "0.4" serde = "1.0" serde-saphyr = "0.0" diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index 5e20a5a..4f54c84 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -264,7 +264,7 @@ impl Module { default_data_prefix: format!("data_ov{:03}_", overlay.id()), default_sinit_prefix: format!("__sinit_ov{:03}_", overlay.id()), sections: Sections::new(), - signed: overlay.is_signed(), + signed: overlay.originally_signed(), }; let symbol_map = symbol_maps.get_mut(module.kind); From 83da00d4bf4b149a3d81e05c321d5dd86aee1710 Mon Sep 17 00:00:00 2001 From: Aetias Date: Sun, 17 May 2026 20:22:43 +0200 Subject: [PATCH 45/45] lcf: Rewrite overlay grouping algorithm Some games have gaps between each overlay group, which seems to mean that they were manually placed at arbitrary addresses. Rather than add support for this in the current algorithm, I rewrote and shortened it. --- cli/src/analysis/overlay_groups.rs | 168 ++++++++++++++--------------- 1 file changed, 83 insertions(+), 85 deletions(-) diff --git a/cli/src/analysis/overlay_groups.rs b/cli/src/analysis/overlay_groups.rs index 9dc8aa3..dd3eec5 100644 --- a/cli/src/analysis/overlay_groups.rs +++ b/cli/src/analysis/overlay_groups.rs @@ -1,3 +1,5 @@ +use std::collections::{BTreeMap, btree_map}; + use anyhow::Result; use ds_rom::rom::Overlay; @@ -8,112 +10,108 @@ pub struct OverlayGroups { pub type OverlayIndex = u16; pub struct OverlayGroup { - pub index: u16, pub start_address: u32, pub end_address: u32, pub overlays: Vec, pub location: OverlayGroupLocation, } +#[derive(Clone)] pub enum OverlayGroupLocation { AfterStatic, // after ARM9 and custom autoloads After(Vec), // after other overlays Static, // static address } +struct OverlaySuccessors { + overlays: Vec, + precedes: OverlayGroupLocation, +} + impl OverlayGroups { - pub fn analyze(static_end_address: u32, overlays: &[Overlay]) -> Result { - // Find all overlays immediately after the static modules (main program and autoloads except ITCM/DTCM) - let (first_group, first_group_end, mut ungrouped_overlays) = overlays.iter().fold( - (vec![], 0, vec![]), - |(mut first_group, mut first_group_end, mut rest), overlay| { - if overlay.base_address() == static_end_address { - first_group.push(overlay.id()); - first_group_end = first_group_end.max(overlay.end_address()); - } else { - rest.push(overlay.id()); + pub fn analyze(static_end_address: u32, overlays: &[Overlay]) -> Result { + // Map end addresses to modules + let mut precedents: BTreeMap = BTreeMap::new(); + precedents.insert(static_end_address, OverlayGroupLocation::AfterStatic); + for overlay in overlays { + match precedents.entry(overlay.end_address()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(OverlayGroupLocation::After(vec![overlay.id()])); } - (first_group, first_group_end, rest) - }, - ); - log::debug!( - "Found {} overlays after static modules, first group end address: {:#010x}", - first_group.len(), - first_group_end - ); - - // Create groups of overlays, starting with the first group found earlier, ordered by base address - let mut groups = vec![OverlayGroup { - index: 0, - start_address: static_end_address, - end_address: first_group_end, - overlays: first_group, - location: OverlayGroupLocation::AfterStatic, - }]; - - let mut new_group = vec![]; - let mut groups_to_connect = vec![0u16]; // list of groups (indices) which may be preceded by ungrouped overlays - while !ungrouped_overlays.is_empty() { - let Some(connect_index) = groups_to_connect.pop() else { - log::warn!( - "No more overlay groups to connect to after {:#010x} as there are gaps between overlays. Adding remaining overlays as static overlays: {}", - groups.last().unwrap().end_address, - ungrouped_overlays - .iter() - .map(|id| id.to_string()) - .collect::>() - .join(", ") - ); - for id in ungrouped_overlays { - let overlay = &overlays[id as usize]; - groups.push(OverlayGroup { - index: groups.len() as u16, - start_address: overlay.base_address(), - end_address: overlay.end_address(), - overlays: vec![id], - location: OverlayGroupLocation::Static, - }); + btree_map::Entry::Occupied(mut entry) => { + let OverlayGroupLocation::After(overlays) = entry.get_mut() else { + unreachable!(); + }; + overlays.push(overlay.id()); } - break; }; - let connect_index = connect_index as usize; - - for i in 0..groups[connect_index].overlays.len() { - let grouped_overlay = &overlays[groups[connect_index].overlays[i] as usize]; - let overlay_end = grouped_overlay.end_address(); - - let mut group_end = 0; - for j in (0..ungrouped_overlays.len()).rev() { - let overlay = &overlays[ungrouped_overlays[j] as usize]; - if overlay.base_address() == grouped_overlay.end_address() { - new_group.push(ungrouped_overlays.remove(j)); - group_end = group_end.max(overlay.end_address()); - } + } + let precedents = precedents; + + // Map base addresses to overlays and precedents + let mut successors_map: BTreeMap = BTreeMap::new(); + for overlay in overlays { + match successors_map.entry(overlay.base_address()) { + btree_map::Entry::Vacant(entry) => { + let precedes = if let Some(precedes) = precedents.get(&overlay.base_address()) { + precedes.clone() + } else { + OverlayGroupLocation::Static + }; + entry.insert(OverlaySuccessors { overlays: vec![overlay.id()], precedes }); } - - if !new_group.is_empty() { - let after = groups[connect_index] - .overlays + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().overlays.push(overlay.id()); + } + }; + } + let successors_map = successors_map; + + // Create overlay groups + let mut groups = Vec::new(); + let mut group_index_by_overlay = vec![None; overlays.len()]; + for (base_address, successors) in successors_map { + let end_address = successors + .overlays + .iter() + .map(|&id| overlays[id as usize].end_address()) + .max() + .unwrap(); + + let location = match successors.precedes { + OverlayGroupLocation::AfterStatic => OverlayGroupLocation::AfterStatic, + OverlayGroupLocation::Static => OverlayGroupLocation::Static, + OverlayGroupLocation::After(items) => { + let mut group_indices = items .iter() - .copied() - .filter(|&id| overlays[id as usize].end_address() <= overlay_end) + .map(|&id| group_index_by_overlay[id as usize].unwrap()) + .collect::>(); + group_indices.sort_unstable(); + group_indices.dedup(); + let preceding_overlays = group_indices + .iter() + .flat_map(|&group_index| { + let group: &OverlayGroup = &groups[group_index]; + group + .overlays + .iter() + .filter(|&&id| overlays[id as usize].end_address() <= base_address) + .copied() + }) .collect(); - - new_group.reverse(); - - let index = groups.len() as u16; - groups.push(OverlayGroup { - index, - start_address: overlay_end, - end_address: group_end, - overlays: new_group, - location: OverlayGroupLocation::After(after), - }); - groups_to_connect.push(index); - - new_group = vec![]; + OverlayGroupLocation::After(preceding_overlays) } + }; + + for &overlay in &successors.overlays { + group_index_by_overlay[overlay as usize] = Some(groups.len()); } + groups.push(OverlayGroup { + start_address: base_address, + end_address, + overlays: successors.overlays, + location, + }); } Ok(Self { groups })