diff --git a/Cargo.lock b/Cargo.lock index 05c09aa..76a52ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -556,9 +556,8 @@ dependencies = [ [[package]] name = "ds-rom" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "473d5a6494a1be90379bf6f346cce68ee36baa60a997eacf947071af82f973fe" +version = "0.7.1" +source = "git+https://github.com/AetiasHax/ds-rom?branch=0.7.1#9632465c3f911d4533d78ff5e120d2a7a0667894" dependencies = [ "bitfield-struct", "bitreader", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index e737c2b..ab027cd 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,7 +20,7 @@ base64 = "0.22" clap = { version = "4.5", features = ["derive"] } cpp_demangle = "0.5" ds-decomp = { path = "../lib" } -ds-rom = "0.7" +ds-rom = { git = "https://github.com/AetiasHax/ds-rom", branch = "0.7.1" } env_logger = "0.11" fxhash = "0.2" log = "0.4" diff --git a/cli/src/analysis/data.rs b/cli/src/analysis/data.rs index c7df7a7..ec46d74 100644 --- a/cli/src/analysis/data.rs +++ b/cli/src/analysis/data.rs @@ -1,10 +1,10 @@ use ds_decomp::{ - analysis::functions::Function, + analysis::functions::{CalledFunction, Function}, config::{ - module::{AnalysisOptions, Module, ModuleKind}, + module::{Module, ModuleKind}, relocations::{Relocation, RelocationFromModulesError, RelocationModule}, section::{SectionCodeError, SectionIndex, SectionKind}, - symbol::{SymbolMapError, SymbolMaps}, + symbol::{InstructionMode, SymFunction, SymLabel, SymbolKind, SymbolMapError, SymbolMaps}, }, }; use snafu::Snafu; @@ -18,9 +18,18 @@ pub struct AnalyzeExternalReferencesOptions<'a> { #[derive(Debug, Snafu)] pub enum AnalyzeExternalReferencesError { #[snafu(display( - "Local function call from {from:#010x} in {module_kind} to {to:#010x} leads to no function" + "Failed to add relocation for local function call from {from:#010x} in {module_kind} to {to:#010x} as it leads to no function" ))] LocalFunctionNotFound { from: u32, to: u32, module_kind: ModuleKind }, + #[snafu(display( + "Failed to add relocation for function call from {from:#010x} in {from_module} to {to:#010x} in {to_module} as it leads to a non-function symbol" + ))] + InvalidCallDestinationSymbol { + from: u32, + to: u32, + from_module: ModuleKind, + to_module: ModuleKind, + }, #[snafu(transparent)] SymbolMap { source: SymbolMapError }, #[snafu(transparent)] @@ -30,37 +39,30 @@ pub enum AnalyzeExternalReferencesError { } pub fn analyze_external_references( - options: AnalyzeExternalReferencesOptions, - analysis_options: &AnalysisOptions, + options: &mut AnalyzeExternalReferencesOptions, ) -> Result { - let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - let mut result = RelocationResult::new(); - find_relocations_in_functions( - &mut result, - AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps }, - analysis_options, - )?; - find_external_references_in_sections(modules, module_index, &mut result)?; + find_relocations_in_functions(&mut result, options)?; + find_external_references_in_sections(options, &mut result)?; Ok(result) } fn find_external_references_in_sections( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - for section in modules[module_index].sections().iter() { + let o = options; + for section in o.modules[o.module_index].sections().iter() { match section.kind() { SectionKind::Data | SectionKind::Rodata => {} SectionKind::Code | SectionKind::Bss => continue, } let code = section - .code(modules[module_index].code(), modules[module_index].base_address())? + .code(o.modules[o.module_index].code(), o.modules[o.module_index].base_address())? .unwrap(); for word in section.iter_words(code, None) { - find_external_data(modules, module_index, word.address, word.value, result)?; + find_external_data(o, word.address, word.value, result)?; } } Ok(()) @@ -68,107 +70,94 @@ fn find_external_references_in_sections( fn find_relocations_in_functions( result: &mut RelocationResult, - options: AnalyzeExternalReferencesOptions, - analysis_options: &AnalysisOptions, + options: &mut AnalyzeExternalReferencesOptions, ) -> Result<(), AnalyzeExternalReferencesError> { - let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - - for section in modules[module_index].sections().iter() { + for section in options.modules[options.module_index].sections().iter() { for function in section.functions().values() { - add_function_calls_as_relocations( - function, - result, - AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps }, - analysis_options, - )?; - find_external_data_from_pools(modules, module_index, function, result)?; + add_function_calls_as_relocations(function, result, options)?; + find_external_data_from_pools(options, function, result)?; } } Ok(()) } +fn iter_function_calls(function: &Function) -> impl Iterator { + function + .function_calls() + .iter() + // TODO: Condition code resets to AL for relocated call instructions + .filter(|(_, called_function)| !called_function.ins.is_conditional()) +} + fn add_function_calls_as_relocations( function: &Function, result: &mut RelocationResult, - options: AnalyzeExternalReferencesOptions, - analysis_options: &AnalysisOptions, + options: &mut AnalyzeExternalReferencesOptions, ) -> Result<(), AnalyzeExternalReferencesError> { let AnalyzeExternalReferencesOptions { modules, module_index, symbol_maps } = options; - for (&address, &called_function) in function.function_calls() { - if called_function.ins.is_conditional() { - // Dumb mwld linker bug removes the condition code from relocated call instructions - continue; - } - - let local_module = &modules[module_index]; + for (&address, &called_function) in iter_function_calls(function) { + let local_module = &modules[*module_index]; let is_local = local_module.sections().get_by_contained_address(called_function.address).is_some(); let module: RelocationModule = if is_local { let module_kind = local_module.kind(); let symbol_map = symbol_maps.get_mut(module_kind); - let symbol = match symbol_map.get_function_containing(called_function.address) { + let symbol = match symbol_map.by_address(called_function.address)? { Some((_, symbol)) => symbol, None => { - if !analysis_options.allow_unknown_function_calls { - let error = LocalFunctionNotFoundSnafu { - from: address, - to: called_function.address, - module_kind, - } - .build(); - log::error!("{error}"); - return Err(error); - } else { - log::warn!( - "Local function call from {:#010x} in {} to {:#010x} leads to no function, inserting an unknown function symbol", - address, - module_kind, - called_function.address - ); - - let thumb_bit = if called_function.thumb { 1 } else { 0 }; - let function_address = called_function.address | thumb_bit; - - if let Some((_, symbol)) = symbol_map.get_function(function_address)? { - symbol - } else { - let name = format!( - "{}{:08x}_unk", - local_module.default_func_prefix, function_address - ); - let (_, symbol) = symbol_map.add_unknown_function( - name, - function_address, - called_function.thumb, - ); - symbol - } + let error = LocalFunctionNotFoundSnafu { + from: address, + to: called_function.address, + module_kind, } + .build(); + log::error!("{error}"); + return Err(error); } }; - if called_function.address != symbol.addr { - log::warn!( - "Local function call from {:#010x} in {} to {:#010x} goes to middle of function '{}' at {:#010x}, adding an external label symbol", - address, - module_kind, - called_function.address, - symbol.name, - symbol.addr - ); - symbol_map.add_external_label(called_function.address, called_function.thumb)?; + match &symbol.kind { + SymbolKind::Function(_) | SymbolKind::Label(SymLabel { external: true, .. }) => {} + + SymbolKind::Label(SymLabel { external: false, .. }) + | SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => { + return InvalidCallDestinationSymbolSnafu { + from: address, + to: called_function.address, + from_module: module_kind, + to_module: module_kind, + } + .fail(); + } } module_kind.into() } else { let candidates = modules.iter().filter(|&module| { let symbol_map = symbol_maps.get(module.kind()).unwrap(); - let Some((function, _)) = symbol_map.get_function(called_function.address).unwrap() + let Some((_, symbol)) = symbol_map.by_address(called_function.address).unwrap() else { return false; }; - function.mode.into_thumb() == Some(called_function.thumb) + + let mode = match &symbol.kind { + SymbolKind::Function(SymFunction { mode, .. }) + | SymbolKind::Label(SymLabel { external: true, mode }) => mode, + + SymbolKind::Label(SymLabel { external: false, .. }) + | SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => return false, + }; + + mode.into_thumb() == Some(called_function.thumb) }); RelocationModule::from_modules(candidates)? }; @@ -176,7 +165,7 @@ fn add_function_calls_as_relocations( if module == RelocationModule::None { log::warn!( "No functions from {address:#010x} in {} to {:#010x}:", - modules[module_index].kind(), + modules[*module_index].kind(), called_function.address ); } @@ -201,44 +190,37 @@ fn add_function_calls_as_relocations( } fn find_external_data_from_pools( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, function: &Function, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - let module = &modules[module_index]; - for pool_constant in function.iter_pool_constants(module.code(), module.base_address()) { - find_external_data( - modules, - module_index, - pool_constant.address, - pool_constant.value, - result, - )?; + for pool_constant in function.iter_pool_constants() { + find_external_data(options, pool_constant.address, pool_constant.value, result)?; } Ok(()) } fn find_external_data( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, address: u32, pointer: u32, result: &mut RelocationResult, ) -> Result<(), AnalyzeExternalReferencesError> { - let local_module = &modules[module_index]; + let o = options; + + let local_module = &o.modules[o.module_index]; let is_local = local_module.sections().get_by_contained_address(pointer).is_some(); if is_local { return Ok(()); } - let candidates = find_symbol_candidates(modules, module_index, pointer); + let candidates = find_symbol_candidates(o, pointer); if candidates.is_empty() { // Probably not a pointer return Ok(()); } - let candidate_modules = candidates.iter().map(|c| &modules[c.module_index]); + let candidate_modules = candidates.iter().map(|c| &o.modules[c.module_index]); let module = RelocationModule::from_modules(candidate_modules)?; result.relocations.push(Relocation::new_load(address, pointer, 0, module)); @@ -247,26 +229,47 @@ fn find_external_data( } fn find_symbol_candidates( - modules: &[Module], - module_index: usize, + options: &mut AnalyzeExternalReferencesOptions, pointer: u32, ) -> Vec { - modules + options + .modules .iter() .enumerate() .filter_map(|(index, module)| { - if index == module_index { + if index == options.module_index { return None; } let (section_index, section) = module.sections().get_by_contained_address(pointer)?; + let symbol_map = options.symbol_maps.get(module.kind()).unwrap(); if section.kind() == SectionKind::Code { - let function = section.functions().get(&(pointer & !1))?; + let (_, symbol) = symbol_map.by_address(pointer & !1).unwrap()?; + let symbol_is_thumb = match &symbol.kind { + SymbolKind::Function(function) => function.mode == InstructionMode::Thumb, + SymbolKind::Label(SymLabel { external: true, mode }) => { + *mode == InstructionMode::Thumb + } + SymbolKind::Label(SymLabel { external: false, .. }) + | SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => return None, + }; + let thumb = (pointer & 1) != 0; - if function.is_thumb() != thumb { + if symbol_is_thumb != thumb { return None; } } - Some(SymbolCandidate { module_index: index, section_index }) + if let Some((_, symbol)) = symbol_map.by_address(pointer).unwrap() + && symbol.local + { + // Existing symbol is local, so it can't be referred to by a relocation + None + } else { + Some(SymbolCandidate { module_index: index, section_index }) + } }) .collect::>() } diff --git a/cli/src/analysis/functions.rs b/cli/src/analysis/functions.rs index 0dd7236..7abdbff 100644 --- a/cli/src/analysis/functions.rs +++ b/cli/src/analysis/functions.rs @@ -1,7 +1,13 @@ use std::io; use anyhow::{Result, bail}; -use ds_decomp::analysis::functions::Function; +use ds_decomp::{ + analysis::{ + functions::Function, + jump_table::{JumpTableKind, ThumbJumpTableKind}, + }, + config::symbol::SymJumpTable, +}; use unarm::{ArmVersion, DisplayOptions, Endian, ParseFlags, ParseMode, Parser, RegNames}; use crate::config::symbol::{SymDataExt, SymbolLookup}; @@ -88,21 +94,38 @@ impl FunctionExt for Function { // write instruction match jump_table { - Some((table, sym)) if !table.code => { - let (directive, value) = if self.is_thumb() { - (".short", i32::from(ins.code() as i16)) - } else { - (".word", ins.code().cast_signed()) - }; - let label_address = (sym.addr.cast_signed() + value + 2).cast_unsigned(); - let Some(label) = symbols.symbol_map.get_label(label_address)? else { - log::error!( - "Expected label for jump table destination {label_address:#010x}" - ); - bail!("Expected label for jump table destination {label_address:#010x}"); - }; - write!(w, " {directive} {} - {} - 2", label.name, sym.name)?; - } + Some((SymJumpTable { kind: JumpTableKind::Thumb(kind), .. }, sym)) => match kind { + ThumbJumpTableKind::Halfword => { + let value = i32::from(ins.code() as i16); + write_numerical_jump_table_entry( + w, symbols, sym, value, ".short", address, + )?; + } + ThumbJumpTableKind::Byte => { + let code = ins.code() as i16; + let [first_value, second_value] = code.to_le_bytes(); + let first_value = first_value as i8 as i32; + let second_value = second_value as i8 as i32; + write_numerical_jump_table_entry( + w, + symbols, + sym, + first_value, + ".byte", + address, + )?; + write_jump_table_case(w, jump_table, 1, address)?; + write_numerical_jump_table_entry( + w, + symbols, + sym, + second_value, + ".byte", + address + 1, + )?; + write_jump_table_case(w, jump_table, 1, address + 1)?; + } + }, _ => { if parser.mode != ParseMode::Data { write!(w, " ")?; @@ -127,22 +150,15 @@ impl FunctionExt for Function { { symbols.write_ambiguous_symbols_comment(w, address, reference)?; } + write_jump_table_case(w, jump_table, ins_size, address)?; } } - // write jump table case - if let Some((_table, sym)) = jump_table { - let case = (address - sym.addr) / ins_size; - writeln!(w, " ; case {case}")?; - } else { - writeln!(w)?; - } - // write pool constants let next_address = address + ins_size; for i in 0.. { let pool_address = next_address + i * 4; - if self.pool_constants().contains(&pool_address) { + if self.pool_constants().contains_key(&pool_address) { let start = pool_address - base_address; let bytes = &module_code[start as usize..]; let const_value = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); @@ -189,3 +205,38 @@ impl FunctionExt for Function { Ok(()) } } + +fn write_jump_table_case( + w: &mut W, + jump_table: Option<(SymJumpTable, &ds_decomp::config::symbol::Symbol)>, + ins_size: u32, + address: u32, +) -> std::result::Result<(), io::Error> { + if let Some((_table, sym)) = jump_table { + let case = (address - sym.addr) / ins_size; + writeln!(w, " ; case {case}") + } else { + writeln!(w) + } +} + +fn write_numerical_jump_table_entry( + w: &mut W, + symbols: &SymbolLookup<'_>, + sym: &ds_decomp::config::symbol::Symbol, + value: i32, + directive: &str, + address: u32, +) -> Result<(), anyhow::Error> { + let label_address = (sym.addr.cast_signed() + value + 2).cast_unsigned(); + let Some(label) = symbols.symbol_map.get_label(label_address)? else { + log::error!( + "Expected label for jump table destination from {address:#010x} to {label_address:#010x}" + ); + bail!( + "Expected label for jump table destination from {address:#010x} to {label_address:#010x}" + ); + }; + write!(w, " {} {} - {} - 2", directive, label.name, sym.name)?; + Ok(()) +} diff --git a/cli/src/analysis/overlay_groups.rs b/cli/src/analysis/overlay_groups.rs index 3cdff2e..dd3eec5 100644 --- a/cli/src/analysis/overlay_groups.rs +++ b/cli/src/analysis/overlay_groups.rs @@ -1,4 +1,6 @@ -use anyhow::{Result, bail}; +use std::collections::{BTreeMap, btree_map}; + +use anyhow::Result; use ds_rom::rom::Overlay; pub struct OverlayGroups { @@ -8,87 +10,108 @@ pub struct OverlayGroups { pub type OverlayIndex = u16; pub struct OverlayGroup { - pub index: u16, pub start_address: u32, pub end_address: u32, pub overlays: Vec, - pub after: Vec, + pub location: OverlayGroupLocation, } -impl OverlayGroups { - pub fn analyze(static_end_address: u32, overlays: &[Overlay]) -> Result { - // Find all overlays immediately after the static modules (main program and autoloads except ITCM/DTCM) - let (first_group, first_group_end, mut ungrouped_overlays) = overlays.iter().fold( - (vec![], 0, vec![]), - |(mut first_group, mut first_group_end, mut rest), overlay| { - if overlay.base_address() == static_end_address { - first_group.push(overlay.id()); - first_group_end = first_group_end.max(overlay.end_address()); - } else { - rest.push(overlay.id()); - } - (first_group, first_group_end, rest) - }, - ); - log::debug!( - "Found {} overlays after static modules, first group end address: {:#010x}", - first_group.len(), - first_group_end - ); +#[derive(Clone)] +pub enum OverlayGroupLocation { + AfterStatic, // after ARM9 and custom autoloads + After(Vec), // after other overlays + Static, // static address +} - // Create groups of overlays, starting with the first group found earlier, ordered by base address - let mut groups = vec![OverlayGroup { - index: 0, - start_address: static_end_address, - end_address: first_group_end, - overlays: first_group, - after: vec![], - }]; +struct OverlaySuccessors { + overlays: Vec, + precedes: OverlayGroupLocation, +} - let mut new_group = vec![]; - let mut groups_to_connect = vec![0u16]; // list of groups (indices) which may be preceded by ungrouped overlays - while !ungrouped_overlays.is_empty() { - let Some(connect_index) = groups_to_connect.pop() else { - bail!("No more overlay groups to connect to, are there gaps between overlays?"); +impl OverlayGroups { + pub fn analyze(static_end_address: u32, overlays: &[Overlay]) -> Result { + // Map end addresses to modules + let mut precedents: BTreeMap = BTreeMap::new(); + precedents.insert(static_end_address, OverlayGroupLocation::AfterStatic); + for overlay in overlays { + match precedents.entry(overlay.end_address()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(OverlayGroupLocation::After(vec![overlay.id()])); + } + btree_map::Entry::Occupied(mut entry) => { + let OverlayGroupLocation::After(overlays) = entry.get_mut() else { + unreachable!(); + }; + overlays.push(overlay.id()); + } }; - let connect_index = connect_index as usize; - - for i in 0..groups[connect_index].overlays.len() { - let grouped_overlay = &overlays[groups[connect_index].overlays[i] as usize]; - let overlay_end = grouped_overlay.end_address(); + } + let precedents = precedents; - let mut group_end = 0; - for j in (0..ungrouped_overlays.len()).rev() { - let overlay = &overlays[ungrouped_overlays[j] as usize]; - if overlay.base_address() == grouped_overlay.end_address() { - new_group.push(ungrouped_overlays.remove(j)); - group_end = group_end.max(overlay.end_address()); - } + // Map base addresses to overlays and precedents + let mut successors_map: BTreeMap = BTreeMap::new(); + for overlay in overlays { + match successors_map.entry(overlay.base_address()) { + btree_map::Entry::Vacant(entry) => { + let precedes = if let Some(precedes) = precedents.get(&overlay.base_address()) { + precedes.clone() + } else { + OverlayGroupLocation::Static + }; + entry.insert(OverlaySuccessors { overlays: vec![overlay.id()], precedes }); } + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().overlays.push(overlay.id()); + } + }; + } + let successors_map = successors_map; - if !new_group.is_empty() { - let after = groups[connect_index] - .overlays + // Create overlay groups + let mut groups = Vec::new(); + let mut group_index_by_overlay = vec![None; overlays.len()]; + for (base_address, successors) in successors_map { + let end_address = successors + .overlays + .iter() + .map(|&id| overlays[id as usize].end_address()) + .max() + .unwrap(); + + let location = match successors.precedes { + OverlayGroupLocation::AfterStatic => OverlayGroupLocation::AfterStatic, + OverlayGroupLocation::Static => OverlayGroupLocation::Static, + OverlayGroupLocation::After(items) => { + let mut group_indices = items + .iter() + .map(|&id| group_index_by_overlay[id as usize].unwrap()) + .collect::>(); + group_indices.sort_unstable(); + group_indices.dedup(); + let preceding_overlays = group_indices .iter() - .copied() - .filter(|&id| overlays[id as usize].end_address() <= overlay_end) + .flat_map(|&group_index| { + let group: &OverlayGroup = &groups[group_index]; + group + .overlays + .iter() + .filter(|&&id| overlays[id as usize].end_address() <= base_address) + .copied() + }) .collect(); - - new_group.reverse(); - - let index = groups.len() as u16; - groups.push(OverlayGroup { - index, - start_address: overlay_end, - end_address: group_end, - overlays: new_group, - after, - }); - groups_to_connect.push(index); - - new_group = vec![]; + OverlayGroupLocation::After(preceding_overlays) } + }; + + for &overlay in &successors.overlays { + group_index_by_overlay[overlay as usize] = Some(groups.len()); } + groups.push(OverlayGroup { + start_address: base_address, + end_address, + overlays: successors.overlays, + location, + }); } Ok(Self { groups }) diff --git a/cli/src/analysis/signature.rs b/cli/src/analysis/signature.rs index 8a51298..fa5321d 100644 --- a/cli/src/analysis/signature.rs +++ b/cli/src/analysis/signature.rs @@ -92,7 +92,7 @@ impl Signatures { for (address, ins, parsed_ins) in parser { let mut ins_bitmask: u32 = 0xffffffff; - if function.pool_constants().contains(&address) { + if function.pool_constants().contains_key(&address) { // TODO: Only mask out pool constants which are pointers? parser.seek_forward(address + 4); // Skip pool constants bitmask.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]); @@ -181,7 +181,7 @@ impl Signatures { relocations.push(SignatureRelocation { offset, name, kind, addend }); } - for &address in function.pool_constants() { + for &address in function.pool_constants().keys() { let offset = (address - function.start_address()) as usize; bitmask[offset..offset + 4].fill(0); pattern[offset..offset + 4].fill(0); diff --git a/cli/src/cmd/check/symbols.rs b/cli/src/cmd/check/symbols.rs index f7beea7..2636ccf 100644 --- a/cli/src/cmd/check/symbols.rs +++ b/cli/src/cmd/check/symbols.rs @@ -88,10 +88,15 @@ impl CheckSymbols { break; } + if matches!(target_symbol.kind, SymbolKind::Label(_)) { + // Label symbols are not imported by SymbolMapsExt::from_object + continue; + } + let Some(symbol_iter) = object.for_address(target_symbol.addr) else { num_mismatches += 1; log::error!( - "Symbol '{}' in {} at {:#010x} not found in linked binary", + "Symbol '{}' in {} at {:#010x} not found by address in linked binary", target_symbol.name, module_kind, target_symbol.addr @@ -100,6 +105,8 @@ impl CheckSymbols { for (_, candidate) in candidates { log::error!(" Matching name found at {:#010x}", candidate.addr); } + } else { + log::error!(" No other symbols found with the same name"); } continue; }; @@ -111,7 +118,7 @@ impl CheckSymbols { else { num_mismatches += 1; log::error!( - "Symbol '{}' in {} at {:#010x} not found in linked binary", + "Symbol '{}' in {} at {:#010x} not found by name in linked binary", target_symbol.name, module_kind, target_symbol.addr @@ -120,6 +127,8 @@ impl CheckSymbols { for (_, candidate) in candidates { log::error!(" Matching name found at {:#010x}", candidate.addr); } + } else { + log::error!(" No other symbols found with the same name"); } if let Some(candidates) = object.for_address(target_symbol.addr) { for (_, candidate) in candidates { diff --git a/cli/src/cmd/dis.rs b/cli/src/cmd/dis.rs index 62836d1..a25cb3e 100644 --- a/cli/src/cmd/dis.rs +++ b/cli/src/cmd/dis.rs @@ -10,7 +10,7 @@ use ds_decomp::config::{ config::Config, delinks::{DelinkFile, Delinks}, module::Module, - section::Section, + section::{Section, SectionKind}, symbol::{InstructionMode, Symbol, SymbolKind, SymbolMaps}, }; use ds_rom::rom::{Rom, RomLoadOptions}; @@ -140,7 +140,7 @@ impl Disassemble { module_kind: module.kind(), symbol_map, symbol_maps, - relocations: module.relocations(), + relocations: Some(module.relocations()), }; let mut symbol_iter = symbol_map.iter_by_address(section.address_range()).peekable(); @@ -150,10 +150,27 @@ impl Disassemble { ); match symbol.kind { SymbolKind::Function(sym_function) => { + if section.kind() == SectionKind::Bss { + log::error!( + "Can't disassemble function at {:#010x} in {} because it's in uninitialized section {}", + symbol.addr, + module.kind(), + section.name() + ); + continue; + } + + let code = code.with_context(|| { + format!( + "No code to dump for function at {:#010x} in {}", + symbol.addr, + module.kind() + ) + })?; if sym_function.unknown { let function_offset = symbol.addr - section.start_address(); if offset < function_offset { - Self::dump_bytes(code.unwrap(), offset, function_offset, writer)?; + Self::dump_bytes(code, offset, function_offset, writer)?; writeln!(writer)?; offset = function_offset; } @@ -178,7 +195,7 @@ impl Disassemble { let function_offset = function.start_address() - section.start_address(); if offset < function_offset { - Self::dump_bytes(code.unwrap(), offset, function_offset, writer)?; + Self::dump_bytes(code, offset, function_offset, writer)?; writeln!(writer)?; } diff --git a/cli/src/cmd/dump/elf_function.rs b/cli/src/cmd/dump/elf_function.rs new file mode 100644 index 0000000..16ff48f --- /dev/null +++ b/cli/src/cmd/dump/elf_function.rs @@ -0,0 +1,194 @@ +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use clap::Args; +use ds_decomp::{ + analysis::functions::{Function, FunctionParseOptions, ParseFunctionOptions}, + config::{config::Config, module::ModuleKind, symbol::SymbolMaps}, +}; +use object::{Object, ObjectSection, ObjectSymbol}; + +use crate::{ + analysis::functions::FunctionExt, + config::{ + module::ModuleKindExt, + symbol::{SymbolLookup, SymbolMapsExt}, + }, + util::io::read_file, +}; + +/// Dumps info about a function from the linked ELF file. +#[derive(Args, Clone)] +pub struct DumpElfFunction { + /// Path to config.yaml. + #[arg(long, short = 'c')] + config_path: PathBuf, + + // Name of the ELF file, defaults to arm9.o. + #[arg(long, short = 'e', default_value = "arm9.o")] + elf_name: String, + + /// Name of the function. + #[arg(long, short = 'n')] + name: String, +} + +impl DumpElfFunction { + pub fn run(&self) -> anyhow::Result<()> { + let config = Config::from_file(&self.config_path)?; + let config_path = self.config_path.parent().unwrap(); + + let elf_data = self.read_elf(&config, config_path)?; + let object = self.parse_elf(&elf_data)?; + + let symbol = self.find_symbol(&object)?; + self.print_symbol(&symbol); + + let section = self.get_section_for_symbol(&object, &symbol)?; + let section_name = self.get_section_name(§ion)?; + self.print_section(section_name); + + let module_kind = self.infer_module_kind(section_name)?; + self.print_module(module_kind); + + for section in object.sections() { + for (src_addr, relocation) in section.relocations() { + let object::RelocationTarget::Symbol(symbol_index) = relocation.target() else { + continue; + }; + if symbol_index != symbol.index() { + continue; + } + let section_name = self.get_section_name(§ion)?; + let module_kind = self.infer_module_kind(section_name)?; + println!(" Relocation from: {:#010x} in {}", src_addr, module_kind); + } + } + + let data = self.read_data(&config, module_kind)?; + self.print_data(&symbol, §ion, &data); + self.print_disassembly(&object, &symbol, §ion, module_kind, &data)?; + + Ok(()) + } + + fn read_elf(&self, config: &Config, config_path: &Path) -> Result, anyhow::Error> { + let build_path = config_path.join(&config.build_path); + let elf_path = build_path.join(&self.elf_name); + Ok(read_file(&elf_path)?) + } + + fn parse_elf<'a>(&self, data: &'a [u8]) -> Result, anyhow::Error> { + Ok(object::File::parse(data)?) + } + + fn find_symbol<'a>(&self, object: &'a object::File<'_>) -> Result> { + object.symbol_by_name(&self.name).context("No function with that name was found") + } + + fn print_symbol(&self, symbol: &object::Symbol<'_, '_>) { + println!("{}:", self.name); + println!(" Address: {:#010x}", symbol.address()); + println!(" Size: {:#x}", symbol.size()); + } + + fn get_section_for_symbol<'a>( + &self, + object: &'a object::File<'_>, + symbol: &object::Symbol<'_, '_>, + ) -> Result> { + let section_index = symbol.section_index().context("Function symbol has no section")?; + object.section_by_index(section_index).context("Function's section not found") + } + + fn get_section_name<'a>(&self, section: &'a object::Section<'_, '_>) -> Result<&'a str> { + section.name().context("Failed to get section name") + } + + fn print_section(&self, section_name: &str) { + println!(" Section: {}", section_name); + } + + fn infer_module_kind(&self, section_name: &str) -> Result { + ModuleKind::from_linked_section_name(section_name) + .context("Failed to get module kind")? + .context("Section name does not match any known module") + } + + fn print_module(&self, module_kind: ModuleKind) { + println!(" Module: {}", module_kind); + } + + fn read_data(&self, config: &Config, module_kind: ModuleKind) -> Result> { + let config_path = self.config_path.parent().unwrap(); + let config_module = config + .get_module_config_by_kind(module_kind) + .with_context(|| format!("{} not found in config.yaml", module_kind))?; + let bin_file_path = config_path.join(&config_module.object); + read_file(&bin_file_path).with_context(|| { + format!("Failed to read section data from {}", bin_file_path.display()) + }) + } + + fn print_data( + &self, + symbol: &object::Symbol<'_, '_>, + section: &object::Section<'_, '_>, + data: &[u8], + ) { + println!(" Data:"); + let start = (symbol.address() + 1).next_multiple_of(16) - 16; + let end = (symbol.address() + symbol.size()).next_multiple_of(16); + for row_address in (start..end).step_by(16) { + print!(" {:08x} ", row_address); + for i in 0..16 { + let address = row_address + i; + if (symbol.address()..symbol.address() + symbol.size()).contains(&address) { + print!(" {:02x}", data[(address - section.address()) as usize]); + } else { + print!(" .."); + } + } + println!(); + } + } + + fn print_disassembly( + &self, + object: &object::File<'_>, + symbol: &object::Symbol<'_, '_>, + section: &object::Section<'_, '_>, + module_kind: ModuleKind, + data: &[u8], + ) -> Result<(), anyhow::Error> { + let function = Function::parse_function(FunctionParseOptions { + name: self.name.clone(), + start_address: symbol.address() as u32, + base_address: section.address() as u32, + module_code: data, + known_end_address: None, + module_start_address: section.address() as u32, + module_end_address: section.address() as u32 + data.len() as u32, + existing_functions: None, + check_defs_uses: false, + parse_options: ParseFunctionOptions { thumb: None }, + }) + .context("Failed to parse function")?; + + let symbol_maps = + SymbolMaps::from_object(object).context("Failed to construct symbol maps from ELF")?; + let symbol_map = symbol_maps.get(module_kind).unwrap(); + let symbol_lookup = + SymbolLookup { module_kind, symbol_map, symbol_maps: &symbol_maps, relocations: None }; + + println!("Disassembly:"); + function.write_assembly( + &mut std::io::stdout().lock(), + &symbol_lookup, + data, + section.address() as u32, + true, + )?; + Ok(()) + } +} diff --git a/cli/src/cmd/dump/mod.rs b/cli/src/cmd/dump/mod.rs index ef4bee0..ba45128 100644 --- a/cli/src/cmd/dump/mod.rs +++ b/cli/src/cmd/dump/mod.rs @@ -1,8 +1,10 @@ mod ambig_relocs; +mod elf_function; mod elf_symbols; use ambig_relocs::*; use clap::{Args, Subcommand}; +use elf_function::*; use elf_symbols::*; /// Subcommands for dumping information from a dsd project. @@ -17,6 +19,7 @@ impl DumpArgs { match &self.command { DumpCommands::ElfSymbols(dump_elf_symbols) => dump_elf_symbols.run(), DumpCommands::AmbigRelocs(dump_ambig_relocs) => dump_ambig_relocs.run(), + DumpCommands::ElfFunction(dump_elf_function) => dump_elf_function.run(), } } } @@ -25,4 +28,5 @@ impl DumpArgs { enum DumpCommands { ElfSymbols(DumpElfSymbols), AmbigRelocs(DumpAmbigRelocs), + ElfFunction(DumpElfFunction), } diff --git a/cli/src/cmd/fix/thumb_nop.rs b/cli/src/cmd/fix/thumb_nop.rs index af0a507..17f6c02 100644 --- a/cli/src/cmd/fix/thumb_nop.rs +++ b/cli/src/cmd/fix/thumb_nop.rs @@ -89,7 +89,7 @@ impl FixThumbNop { } let last_instruction_address = function.end_address() - 2; - if function.pool_constants().contains(&(last_instruction_address & !3)) { + if function.pool_constants().contains_key(&(last_instruction_address & !3)) { continue; } // Function is Thumb and does not end with a pool constant diff --git a/cli/src/cmd/lcf.rs b/cli/src/cmd/lcf.rs index fb85b6c..456f82b 100644 --- a/cli/src/cmd/lcf.rs +++ b/cli/src/cmd/lcf.rs @@ -16,7 +16,7 @@ use strum::IntoEnumIterator as _; use tinytemplate::TinyTemplate; use crate::{ - analysis::overlay_groups::OverlayGroups, + analysis::overlay_groups::{OverlayGroupLocation, OverlayGroups}, config::{ delinks::{DelinksMap, DelinksMapOptions}, section::SectionExt, @@ -190,7 +190,7 @@ impl Lcf { &config_dir.join(&config.delinks_path) }; let file = base_path.join(file_path).with_extension("o").clean(); - writeln!(writer, "{}", file.display())?; + writeln!(writer, "\"{}\"", file.display())?; } Ok(()) } @@ -358,20 +358,22 @@ impl LinkModules { log::debug!("Static end address: {static_end_address:#010x}"); let overlay_groups = OverlayGroups::analyze(static_end_address, rom.arm9_overlays())?; for group in overlay_groups.iter() { - let origin = if group.after.is_empty() { - let last_static_module = link_modules.last_static_module(); - format!("AFTER({})", last_static_module.name) - } else { - format!( - "AFTER({})", - group - .after - .iter() - .map(|id| format!("OV{id:03}")) - .collect::>() - .join(", ") - ) + let origin = match &group.location { + OverlayGroupLocation::AfterStatic => { + let last_static_module = link_modules.last_static_module(); + format!("AFTER({})", last_static_module.name) + } + OverlayGroupLocation::After(ids) => { + format!( + "AFTER({})", + ids.iter().map(|id| format!("OV{id:03}")).collect::>().join(", ") + ) + } + OverlayGroupLocation::Static => { + format!("{:#010x}", group.start_address) + } }; + for &overlay_id in &group.overlays { let kind = ModuleKind::Overlay(overlay_id); link_modules.modules.push(LcfModule::new( diff --git a/cli/src/cmd/rom/config.rs b/cli/src/cmd/rom/config.rs index 9cd4d72..8914cf6 100644 --- a/cli/src/cmd/rom/config.rs +++ b/cli/src/cmd/rom/config.rs @@ -14,13 +14,16 @@ use ds_decomp::config::{ use ds_rom::rom::{ OverlayConfig, OverlayTableConfig, Rom, RomConfig, RomLoadOptions, raw::AutoloadKind, }; -use object::{Object, ObjectSection, ObjectSymbol}; +use object::{ObjectSection, ObjectSymbol}; use path_slash::PathExt; use pathdiff::diff_paths; use crate::{ config::section::SectionExt, - util::io::{create_file, open_file, read_file}, + util::{ + io::{create_file, open_file, read_file}, + object::ObjectCache, + }, }; /// Creates a configuration to build a ROM from linked binaries. @@ -61,11 +64,12 @@ impl ConfigRom { let file = read_file(&self.elf)?; let object = object::File::parse(&*file)?; + let object_cache = ObjectCache::new(&object); - self.config_arm9(&object, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; - self.config_autoloads(&object, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; + self.config_arm9(&object_cache, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; + self.config_autoloads(&object_cache, &config, &rom, &mut rom_paths, new_rom_paths_dir)?; self.config_overlays( - &object, + &object_cache, &config, &rom, &mut rom_paths, @@ -130,7 +134,7 @@ impl ConfigRom { fn config_overlays( &self, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, config: &Config, rom: &Rom<'_>, rom_paths: &mut RomConfig, @@ -152,28 +156,33 @@ impl ConfigRom { .with_context(|| format!("Failed to find overlay {} in ROM", overlay.id))?; let module_name = format!("OV{:03}", overlay.id); - let file_name = format!("arm9_ov{:03}.bin", overlay.id); + let file_name = + overlay.module.object.file_name().unwrap().to_string_lossy().to_string(); - let ctor_start = object - .symbol_by_name(&format!("{module_name}_CTOR_START")) + let ctor_start = object_cache + .symbols_by_name + .get(&format!("{module_name}_CTOR_START")) .with_context(|| format!("No CTOR_START in overlay {}", overlay.id))?; - let ctor_end = object - .symbol_by_name(&format!("{module_name}_CTOR_END")) + let ctor_end = object_cache + .symbols_by_name + .get(&format!("{module_name}_CTOR_END")) .with_context(|| format!("No CTOR_END in overlay {}", overlay.id))?; let base_address = - Self::section_ranges(&delinks.sections, &module_name, object, |_| true)? + Self::section_ranges(&delinks.sections, &module_name, object_cache, |_| true)? .unwrap() .start; let mut info = rom_overlay.info().clone(); info.base_address = base_address; - let code_range = Self::section_ranges(&delinks.sections, &module_name, object, |s| { - s.kind().is_initialized() - })?; - let bss_range = Self::section_ranges(&delinks.sections, &module_name, object, |s| { - !s.kind().is_initialized() - })?; + let code_range = + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { + s.kind().is_initialized() + })?; + let bss_range = + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { + !s.kind().is_initialized() + })?; let bss_range = bss_range .or(code_range.map(|r| r.end..r.end)) @@ -216,7 +225,7 @@ impl ConfigRom { fn config_autoloads( &self, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, config: &Config, rom: &Rom<'_>, rom_paths: &mut RomConfig, @@ -236,22 +245,20 @@ impl ConfigRom { .find(|a| a.base_address() == base_address) .with_context(|| format!("Failed to find autoload {} in ROM", autoload.kind))?; - let (module_name, file_name) = match autoload.kind { - AutoloadKind::Itcm => ("ITCM".into(), "itcm.yaml".into()), - AutoloadKind::Dtcm => ("DTCM".into(), "dtcm.yaml".into()), - AutoloadKind::Unknown(index) => { - (format!("AUTOLOAD_{index}"), format!("autoload_{index}.yaml")) - } + let module_name = match autoload.kind { + AutoloadKind::Itcm => "ITCM".into(), + AutoloadKind::Dtcm => "DTCM".into(), + AutoloadKind::Unknown(index) => format!("AUTOLOAD_{index}"), }; let mut autoload_info = *rom_autoload.info(); autoload_info.list_entry.code_size = - Self::section_ranges(&delinks.sections, &module_name, object, |s| { + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { s.kind().is_initialized() })? .map_or(0, |range| range.len() as u32); autoload_info.list_entry.bss_size = - Self::section_ranges(&delinks.sections, &module_name, object, |s| { + Self::section_ranges(&delinks.sections, &module_name, object_cache, |s| { !s.kind().is_initialized() })? .map_or(0, |range| range.len() as u32); @@ -261,10 +268,24 @@ impl ConfigRom { autoload_info.list_entry.code_size.next_multiple_of(text_section.alignment()); } + let yaml_file_name = autoload + .module + .object + .with_extension("yaml") + .file_name() + .unwrap() + .to_string_lossy() + .to_string(); + let binary_path = config_path.join(&autoload.module.object); - let yaml_path = binary_path.parent().unwrap().join(file_name); + let yaml_path = binary_path.parent().unwrap().join(yaml_file_name); serde_saphyr::to_io_writer(&mut create_file(&yaml_path)?, &autoload_info)?; + if !binary_path.exists() { + // Linker does not create binary file if module is completely empty + create_file(&binary_path)?; + } + match autoload.kind { AutoloadKind::Itcm => { rom_paths.itcm.bin = Self::make_path(binary_path, rom_paths_dir); @@ -274,7 +295,17 @@ impl ConfigRom { rom_paths.dtcm.bin = Self::make_path(binary_path, rom_paths_dir); rom_paths.dtcm.config = Self::make_path(yaml_path, rom_paths_dir); } - AutoloadKind::Unknown(_) => {} + AutoloadKind::Unknown(index) => { + let autoload = rom_paths + .unknown_autoloads + .iter_mut() + .find(|a| a.index == index) + .with_context(|| { + format!("Failed to find autoload {} in ROM config", index) + })?; + autoload.files.bin = Self::make_path(binary_path, rom_paths_dir); + autoload.files.config = Self::make_path(yaml_path, rom_paths_dir); + } } } @@ -283,7 +314,7 @@ impl ConfigRom { fn config_arm9( &self, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, config: &Config, rom: &Rom<'_>, rom_paths: &mut RomConfig, @@ -291,21 +322,24 @@ impl ConfigRom { ) -> Result<()> { let config_path = self.config.parent().unwrap(); - let arm9_section = object.section_by_name("ARM9").context("ARM9 section not found")?; + let arm9_section = + object_cache.sections_by_name.get("ARM9").context("ARM9 section not found")?; let build_info_symbol = - object.symbol_by_name("BuildInfo").context("BuildInfo symbol not found")?; - let autoload_callback_symbol = - object.symbol_by_name("AutoloadCallback").context("BuildInfo symbol not found")?; + object_cache.symbols_by_name.get("BuildInfo").context("BuildInfo symbol not found")?; + let autoload_callback_symbol = object_cache + .symbols_by_name + .get("AutoloadCallback") + .context("BuildInfo symbol not found")?; let delinks = Delinks::from_file(config_path.join(&config.main_module.delinks), ModuleKind::Arm9)?; - let bss_range = Self::section_ranges(&delinks.sections, "ARM9", object, |s| { + let bss_range = Self::section_ranges(&delinks.sections, "ARM9", object_cache, |s| { !s.kind().is_initialized() })? .unwrap(); let mut arm9_build_config = rom.arm9_build_config()?; arm9_build_config.offsets.base_address = arm9_section.address() as u32; - arm9_build_config.offsets.entry_function = object.entry() as u32; + arm9_build_config.offsets.entry_function = object_cache.entry; arm9_build_config.offsets.build_info = (build_info_symbol.address() - arm9_section.address()) as u32; arm9_build_config.offsets.autoload_callback = autoload_callback_symbol.address() as u32; @@ -327,7 +361,7 @@ impl ConfigRom { fn section_ranges( sections: &Sections, module_name: &str, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, predicate: F, ) -> Result>> where @@ -336,7 +370,7 @@ impl ConfigRom { Ok(sections .iter() .filter(predicate) - .map(|s| s.range_from_object(module_name, object)) + .map(|s| s.range_from_object(module_name, object_cache)) .collect::>>()? .into_iter() .reduce(|a, b| a.start.min(b.start)..a.end.max(b.end))) diff --git a/cli/src/config/mod.rs b/cli/src/config/mod.rs index 07b76e1..9c401e4 100644 --- a/cli/src/config/mod.rs +++ b/cli/src/config/mod.rs @@ -1,4 +1,5 @@ pub mod delinks; +pub mod module; pub mod program; pub mod relocation; pub mod section; diff --git a/cli/src/config/module.rs b/cli/src/config/module.rs new file mode 100644 index 0000000..bef8973 --- /dev/null +++ b/cli/src/config/module.rs @@ -0,0 +1,33 @@ +use anyhow::{Result, anyhow}; +use ds_decomp::config::module::ModuleKind; +use ds_rom::rom::raw::AutoloadKind; + +pub trait ModuleKindExt +where + Self: Sized, +{ + fn from_linked_section_name(section_name: &str) -> Result>; +} + +impl ModuleKindExt for ModuleKind { + fn from_linked_section_name(section_name: &str) -> Result> { + match section_name { + "ARM9" => Ok(Some(ModuleKind::Arm9)), + "ITCM" => Ok(Some(ModuleKind::Autoload(AutoloadKind::Itcm))), + "DTCM" => Ok(Some(ModuleKind::Autoload(AutoloadKind::Dtcm))), + name if name.starts_with("OV") => { + let id = name[2..].parse::().map_err(|_| { + anyhow!("Invalid overlay ID in linked object section name '{section_name}'") + })?; + Ok(Some(ModuleKind::Overlay(id))) + } + name if name.starts_with("AUTOLOAD_") => { + let index = name[9..].parse::().map_err(|_| { + anyhow!("Invalid autoload index in linked object section name '{section_name}'") + })?; + Ok(Some(ModuleKind::Autoload(AutoloadKind::Unknown(index)))) + } + _ => Ok(None), + } + } +} diff --git a/cli/src/config/program.rs b/cli/src/config/program.rs index d6a539d..1b1f486 100644 --- a/cli/src/config/program.rs +++ b/cli/src/config/program.rs @@ -79,14 +79,11 @@ impl Program { pub fn analyze_cross_references(&mut self, options: &AnalysisOptions) -> Result<()> { for module_index in 0..self.modules.len() { let RelocationResult { relocations, external_symbols } = - data::analyze_external_references( - AnalyzeExternalReferencesOptions { - modules: &self.modules, - module_index, - symbol_maps: &mut self.symbol_maps, - }, - options, - )?; + data::analyze_external_references(&mut AnalyzeExternalReferencesOptions { + modules: &self.modules, + module_index, + symbol_maps: &mut self.symbol_maps, + })?; let module_relocations = self.modules[module_index].relocations_mut(); for reloc in relocations { diff --git a/cli/src/config/section.rs b/cli/src/config/section.rs index 1fbc6c8..fc97134 100644 --- a/cli/src/config/section.rs +++ b/cli/src/config/section.rs @@ -6,7 +6,9 @@ use ds_decomp::config::{ relocations::{Relocation, RelocationKind, RelocationModule}, section::Section, }; -use object::{Object, ObjectSymbol}; +use object::ObjectSymbol; + +use crate::util::object::ObjectCache; pub trait SectionExt { fn relocatable_code(&self, module: &Module) -> Result>>; @@ -14,8 +16,11 @@ pub trait SectionExt { /// Name of this section for creating section boundary symbols, e.g. `ARM9_BSS_START` fn boundary_name(&self) -> String; - fn range_from_object(&self, module_name: &str, object: &object::File<'_>) - -> Result>; + fn range_from_object( + &self, + module_name: &str, + object_cache: &ObjectCache<'_, '_>, + ) -> Result>; } impl SectionExt for Section { @@ -78,17 +83,19 @@ impl SectionExt for Section { fn range_from_object( &self, module_name: &str, - object: &object::File<'_>, + object_cache: &ObjectCache<'_, '_>, ) -> Result> { let boundary_name = self.boundary_name(); let boundary_start = format!("{module_name}_{boundary_name}_START"); let boundary_end = format!("{module_name}_{boundary_name}_END"); - let start = object - .symbol_by_name(&boundary_start) + let start = object_cache + .symbols_by_name + .get(&boundary_start) .with_context(|| format!("Failed to find symbol {boundary_start}"))? .address() as u32; - let end = object - .symbol_by_name(&boundary_end) + let end = object_cache + .symbols_by_name + .get(&boundary_end) .with_context(|| format!("Failed to find symbol {boundary_end}"))? .address() as u32; Ok(start..end) diff --git a/cli/src/config/symbol.rs b/cli/src/config/symbol.rs index 711694a..d316c32 100644 --- a/cli/src/config/symbol.rs +++ b/cli/src/config/symbol.rs @@ -1,20 +1,23 @@ use std::{collections::BTreeMap, io}; -use anyhow::{Result, anyhow, bail}; -use ds_decomp::config::{ - Comments, - module::ModuleKind, - relocations::Relocations, - symbol::{ - InstructionMode, SymData, SymFunction, SymLabel, Symbol, SymbolKind, SymbolMap, SymbolMaps, +use anyhow::{Result, bail}; +use ds_decomp::{ + analysis::jump_table::JumpTableKind, + config::{ + Comments, + module::ModuleKind, + relocations::Relocations, + symbol::{ + InstructionMode, SymData, SymFunction, SymLabel, Symbol, SymbolKind, SymbolMap, + SymbolMaps, + }, }, }; -use ds_rom::rom::raw::AutoloadKind; use object::{Object, ObjectSection, ObjectSymbol}; use unarm::LookupSymbol; use super::relocation::RelocationModuleExt; -use crate::util::bytes::FromSlice; +use crate::{config::module::ModuleKindExt, util::bytes::FromSlice}; pub struct LookupSymbolMap(SymbolMap); @@ -65,25 +68,8 @@ impl SymbolMapsExt for SymbolMaps { } else { continue; }; - let module_kind = match section_name { - "ARM9" => ModuleKind::Arm9, - "ITCM" => ModuleKind::Autoload(AutoloadKind::Itcm), - "DTCM" => ModuleKind::Autoload(AutoloadKind::Dtcm), - name if name.starts_with("OV") => { - let id = name[2..].parse::().map_err(|_| { - anyhow!("Invalid overlay ID in linked object section name '{section_name}'") - })?; - ModuleKind::Overlay(id) - } - name if name.starts_with("AUTOLOAD_") => { - let index = name[9..].parse::().map_err(|_| { - anyhow!( - "Invalid autoload index in linked object section name '{section_name}'" - ) - })?; - ModuleKind::Autoload(AutoloadKind::Unknown(index)) - } - _ => continue, + let Some(module_kind) = ModuleKind::from_linked_section_name(section_name)? else { + continue; }; let symbol_map = symbol_maps.get_mut(module_kind); @@ -120,13 +106,10 @@ impl SymbolExt for Symbol { InstructionMode::Thumb => Some("$t"), }, SymbolKind::PoolConstant => Some("$d"), - SymbolKind::JumpTable(jump_table) => { - if jump_table.code { - Some("$a") - } else { - Some("$d") - } - } + SymbolKind::JumpTable(jump_table) => match jump_table.kind { + JumpTableKind::Arm => Some("$a"), + JumpTableKind::Thumb(_) => Some("$d"), + }, SymbolKind::Data(_) => Some("$d"), SymbolKind::Bss(_) => None, } @@ -246,7 +229,7 @@ pub struct SymbolLookup<'a> { pub symbol_map: &'a SymbolMap, /// All symbol maps, including external modules pub symbol_maps: &'a SymbolMaps, - pub relocations: &'a Relocations, + pub relocations: Option<&'a Relocations>, } impl SymbolLookup<'_> { @@ -258,7 +241,10 @@ impl SymbolLookup<'_> { new_line: &mut bool, indent: &str, ) -> Result { - if let Some(relocation) = self.relocations.get(source) { + let Some(relocations) = self.relocations else { + return Ok(false); + }; + if let Some(relocation) = relocations.get(source) { let relocation_to = relocation.module(); if let Some(module_kind) = relocation_to.first_module() { let symbol_address = (destination as i64 - relocation.addend()) as u32; @@ -324,7 +310,8 @@ impl SymbolLookup<'_> { source: u32, destination: u32, ) -> Result<()> { - let Some(relocation) = self.relocations.get(source) else { return Ok(()) }; + let Some(relocations) = self.relocations else { return Ok(()) }; + let Some(relocation) = relocations.get(source) else { return Ok(()) }; if let Some(overlays) = relocation.module().other_modules() { write!(w, " ; ")?; @@ -364,7 +351,8 @@ impl LookupSymbol for SymbolLookup<'_> { if let Some((_, symbol)) = self.symbol_map.first_at_address(destination) { return Some(&symbol.name); } - if let Some(relocation) = self.relocations.get(source) { + let relocations = self.relocations?; + if let Some(relocation) = relocations.get(source) { let module_kind = relocation.module().first_module()?; let external_symbol_map = self.symbol_maps.get(module_kind).unwrap(); diff --git a/cli/src/util/mod.rs b/cli/src/util/mod.rs index 77eaaa4..6efddd4 100644 --- a/cli/src/util/mod.rs +++ b/cli/src/util/mod.rs @@ -1,5 +1,6 @@ pub mod bytes; pub mod debug; pub mod io; +pub mod object; pub mod parse; pub mod path; diff --git a/cli/src/util/object.rs b/cli/src/util/object.rs new file mode 100644 index 0000000..8a84ef5 --- /dev/null +++ b/cli/src/util/object.rs @@ -0,0 +1,24 @@ +use std::collections::HashMap; + +use object::{Object as _, ObjectSection as _, ObjectSymbol as _}; + +pub struct ObjectCache<'data, 'file> { + pub symbols_by_name: HashMap>, + pub sections_by_name: HashMap>, + pub entry: u32, +} + +impl<'data, 'file> ObjectCache<'data, 'file> { + pub fn new(object: &'data object::File<'data>) -> Self { + let symbols_by_name = object + .symbols() + .filter_map(|symbol| symbol.name().ok().map(|name| (name.to_string(), symbol))) + .collect::>(); + let sections_by_name = object + .sections() + .filter_map(|section| section.name().ok().map(|name| (name.to_string(), section))) + .collect::>(); + let entry = object.entry() as u32; + Self { symbols_by_name, sections_by_name, entry } + } +} diff --git a/cli/tests/test_roundtrip.rs b/cli/tests/test_roundtrip.rs index 0d62f9e..454a895 100644 --- a/cli/tests/test_roundtrip.rs +++ b/cli/tests/test_roundtrip.rs @@ -8,16 +8,18 @@ use std::{ process::Command, }; -use anyhow::Result; -use ds_decomp::config::config::Config; +use anyhow::{Result, bail}; +use ds_decomp::{ + analysis::FindLocalDataError, + config::{config::Config, module::ModuleError}, +}; use ds_decomp_cli::{ - analysis::data::AnalyzeExternalReferencesError, cmd::{CheckModules, CheckSymbols, ConfigRom, Delink, Disassemble, Init, JsonDelinks, Lcf}, util::io::{create_dir_all, read_to_string}, }; use ds_rom::{ crypto::blowfish::BlowfishKey, - rom::{Rom, raw}, + rom::{Rom, RomLoadOptions, raw}, }; use log::LevelFilter; use zip::ZipArchive; @@ -53,13 +55,20 @@ fn test_roundtrip() -> Result<()> { // Extract ROM let base_name = path.with_extension("").file_name().unwrap().to_str().unwrap().to_string(); let project_path = roms_dir.join(&base_name); - let extract_path = extract_rom(&path, &project_path, &key)?; + let extract_path = project_path.join("extract"); + let raw_rom = raw::Rom::from_file(path)?; + let rom = Rom::extract(&raw_rom)?; + rom.save(&extract_path, Some(&key))?; let rom_config = extract_path.join("config.yaml"); // Init dsd project + let mut allowed_unknown_function_calls = false; let dsd_config_dir = dsd_init(&project_path, &rom_config, false).or_else(|e| { - match e.downcast_ref::() { - Some(AnalyzeExternalReferencesError::LocalFunctionNotFound { .. }) => { + match e.downcast_ref::() { + Some(ModuleError::FindLocalData { + source: FindLocalDataError::LocalFunctionNotFound { .. }, + }) => { + allowed_unknown_function_calls = true; log::info!("dsd init failed, trying again with unknown function calls"); dsd_init(&project_path, &rom_config, true) } @@ -68,11 +77,18 @@ fn test_roundtrip() -> Result<()> { })?; let dsd_config_yaml = dsd_config_dir.join("arm9/config.yaml"); let dsd_config = Config::from_file(&dsd_config_yaml)?; - let target_config_dir = configs_dir.join(base_name); - assert!( - target_config_dir.exists(), - "Init succeeded, copy the config directory to tests/configs/ to compare future runs" - ); + let target_config_dir = configs_dir.join(&base_name); + if allowed_unknown_function_calls { + assert!( + target_config_dir.exists(), + "Init succeeded with unknown function calls, copy the config directory to tests/configs/ to compare future runs" + ); + } else { + assert!( + target_config_dir.exists(), + "Init succeeded, copy the config directory to tests/configs/ to compare future runs" + ); + } assert!(directory_equals(&target_config_dir, &dsd_config_dir)?); @@ -134,6 +150,22 @@ fn test_roundtrip() -> Result<()> { ConfigRom { elf: linker_out_file.clone(), config: dsd_config_yaml.clone() }; config_rom.run()?; + // Build ROM + let rom_config_path = dsd_config_dir + .join("arm9") + .join(&dsd_config.main_module.object) + .with_file_name("rom_config.yaml"); + let rom_load_options = RomLoadOptions { key: Some(&key), ..Default::default() }; + let rom = Rom::load(&rom_config_path, rom_load_options)?; + let built_rom = rom.build(Some(&key))?; + let rom_path = project_path.join(format!("build_{base_name}.nds")); + built_rom.save(rom_path)?; + + // Compare ROMs + if built_rom.data() != raw_rom.data() { + bail!("Built ROM does not match base ROM"); + } + fs::remove_dir_all(project_path)?; } @@ -180,14 +212,6 @@ fn dsd_init( Ok(dsd_config_dir) } -fn extract_rom(path: &Path, project_path: &Path, key: &BlowfishKey) -> Result { - let extract_path = project_path.join("extract"); - let raw_rom = raw::Rom::from_file(path)?; - let rom = Rom::extract(&raw_rom)?; - rom.save(&extract_path, Some(key))?; - Ok(extract_path) -} - fn directory_equals(target: &Path, base: &Path) -> Result { log::debug!("Comparing target directory '{}' with base '{}'", target.display(), base.display()); diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a4137da..fb1df80 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -10,7 +10,7 @@ description = "Library for ds-decomp, a DS decompilation toolkit." [dependencies] bytemuck = { version = "1.25", features = ["derive"] } -ds-rom = "0.7" +ds-rom = { git = "https://github.com/AetiasHax/ds-rom", branch = "0.7.1" } log = "0.4" serde = "1.0" serde-saphyr = "0.0" diff --git a/lib/src/analysis/ctor.rs b/lib/src/analysis/ctor.rs index e9f2969..d218cc2 100644 --- a/lib/src/analysis/ctor.rs +++ b/lib/src/analysis/ctor.rs @@ -135,9 +135,9 @@ impl CtorRange { Err(e) => return Err(e.into()), }; - let p_ctor_start = run_inits_func + let (p_ctor_start, _) = run_inits_func .pool_constants() - .first() + .first_key_value() .ok_or_else(|| NoInitPoolConstantsSnafu.build())?; let ctor_start_data = &run_inits_code[(p_ctor_start - run_inits_addr) as usize..]; let ctor_start = u32::from_le_bytes([ diff --git a/lib/src/analysis/data.rs b/lib/src/analysis/data.rs index e6d8892..d91d59f 100644 --- a/lib/src/analysis/data.rs +++ b/lib/src/analysis/data.rs @@ -3,16 +3,16 @@ use std::{collections::BTreeMap, ops::Range}; use snafu::Snafu; use crate::{ - analysis::functions::Function, + analysis::functions::{CalledFunction, Function}, config::{ Comments, - module::{AnalysisOptions, ModuleKind}, + module::{AnalysisOptions, Module, ModuleKind}, relocations::{ Relocation, RelocationKind, RelocationModule, RelocationOptions, Relocations, RelocationsError, }, section::{Section, SectionKind, Sections}, - symbol::{SymBss, SymData, SymbolMap, SymbolMapError}, + symbol::{SymBss, SymData, SymbolKind, SymbolMap, SymbolMapError}, }, function, }; @@ -31,6 +31,10 @@ pub struct FindLocalDataOptions<'a> { #[derive(Debug, Snafu)] pub enum FindLocalDataError { + #[snafu(display( + "Local function call from {from:#010x} in {module_kind} to {to:#010x} leads to no function" + ))] + LocalFunctionNotFound { from: u32, to: u32, module_kind: ModuleKind }, #[snafu(transparent)] SymbolMap { source: SymbolMapError }, #[snafu(transparent)] @@ -56,7 +60,7 @@ pub fn find_local_data_from_pools( } = options; let address_range = None; - for pool_constant in function.iter_pool_constants(code, base_address) { + for pool_constant in function.iter_pool_constants() { let pointer = pool_constant.value; if let Some(reloc_kind) = relocation_overrides.get(&pointer) { relocations.add(Relocation::new(RelocationOptions { @@ -73,21 +77,38 @@ pub fn find_local_data_from_pools( // Not a pointer, or points to a different module continue; }; - let function = symbol_map.get_function(pointer & !1)?; + let symbol = symbol_map.by_address(pointer & !1)?; if section.kind() == SectionKind::Code - && let Some((function, _)) = function + && let Some((_, symbol)) = symbol { let thumb = (pointer & 1) != 0; - if function.mode.into_thumb() != Some(thumb) { - // Instruction mode must match - continue; - } + let symbol_thumb = match &symbol.kind { + SymbolKind::Function(function) => function.mode.into_thumb(), + SymbolKind::Label(label) => { + if label.external { + label.mode.into_thumb() + } else { + None + } + } + SymbolKind::Undefined + | SymbolKind::PoolConstant + | SymbolKind::JumpTable(_) + | SymbolKind::Data(_) + | SymbolKind::Bss(_) => None, + }; + if let Some(symbol_thumb) = symbol_thumb { + if symbol_thumb != thumb { + // Instruction mode must match + continue; + } - // Relocate function pointer - let reloc = - relocations.add_load(pool_constant.address, pointer, 0, module_kind.into())?; - if analysis_options.provide_reloc_source { - reloc.comments.post_comment = Some(function!().to_string()); + // Relocate function pointer + let reloc = + relocations.add_load(pool_constant.address, pointer, 0, module_kind.into())?; + if analysis_options.provide_reloc_source { + reloc.comments.post_comment = Some(function!().to_string()); + } } } else { add_symbol_from_pointer( @@ -209,3 +230,103 @@ fn add_symbol_from_pointer( Ok(()) } + +pub fn find_function_labels( + module: &Module, + symbol_map: &mut SymbolMap, + options: &AnalysisOptions, +) -> Result<(), FindLocalDataError> { + for section in module.sections().iter() { + for function in section.functions().values() { + if options.allow_unknown_function_calls { + insert_unknown_function_symbols(function, module, symbol_map)?; + } + add_external_labels(function, module, symbol_map)?; + } + } + Ok(()) +} + +fn iter_function_calls(function: &Function) -> impl Iterator { + function + .function_calls() + .iter() + // TODO: Condition code resets to AL for relocated call instructions + .filter(|(_, called_function)| !called_function.ins.is_conditional()) +} + +fn insert_unknown_function_symbols( + function: &Function, + module: &Module, + symbol_map: &mut SymbolMap, +) -> Result<(), FindLocalDataError> { + for (&address, &called_function) in iter_function_calls(function) { + let local_module = module; + let is_local = + local_module.sections().get_by_contained_address(called_function.address).is_some(); + if !is_local { + continue; + } + + let module_kind = local_module.kind(); + if symbol_map.get_function_containing(called_function.address).is_none() { + log::warn!( + "Local function call from {:#010x} in {} to {:#010x} leads to no function, inserting an unknown function symbol", + address, + module_kind, + called_function.address + ); + + let thumb_bit = if called_function.thumb { 1 } else { 0 }; + let function_address = called_function.address | thumb_bit; + + if symbol_map.get_function(function_address)?.is_none() { + let name = + format!("{}{:08x}_unk", local_module.default_func_prefix, function_address); + symbol_map.add_unknown_function(name, function_address, called_function.thumb); + } + } + } + Ok(()) +} + +fn add_external_labels( + function: &Function, + module: &Module, + symbol_map: &mut SymbolMap, +) -> Result<(), FindLocalDataError> { + for (&address, &called_function) in iter_function_calls(function) { + let is_local = + module.sections().get_by_contained_address(called_function.address).is_some(); + if !is_local { + continue; + } + + let module_kind = module.kind(); + let symbol = match symbol_map.get_function_containing(called_function.address) { + Some((_, symbol)) => symbol, + None => { + let error = LocalFunctionNotFoundSnafu { + from: address, + to: called_function.address, + module_kind, + } + .build(); + log::error!("{error}"); + return Err(error); + } + }; + if called_function.address != symbol.addr { + log::warn!( + "Local function call from {:#010x} in {} to {:#010x} goes to middle of function '{}' at {:#010x}, adding an external label symbol", + address, + module_kind, + called_function.address, + symbol.name, + symbol.addr + ); + symbol_map.add_external_label(called_function.address, called_function.thumb)?; + } + } + Ok(()) +} diff --git a/lib/src/analysis/exception.rs b/lib/src/analysis/exception.rs index 479732f..49b8241 100644 --- a/lib/src/analysis/exception.rs +++ b/lib/src/analysis/exception.rs @@ -19,18 +19,32 @@ struct GetExceptixFunction { end_offset: u32, } -const GET_EXCEPTIX_FUNCTIONS: [GetExceptixFunction; 1] = [GetExceptixFunction { - code: &[ - 0x10, 0x20, 0x9f, 0xe5, // ldr r2, [pc, #0x10] - 0x10, 0x10, 0x9f, 0xe5, // ldr r1, [pc, #0x10] - 0x0c, 0x20, 0x80, 0xe5, // str r2, [r0, #0xc] - 0x10, 0x10, 0x80, 0xe5, // str r1, [r0, #0x10] - 0x01, 0x00, 0xa0, 0xe3, // mov r0, #1 - 0x1e, 0xff, 0x2f, 0xe1, // bx lr - ], - start_offset: 0x18, - end_offset: 0x1c, -}]; +const GET_EXCEPTIX_FUNCTIONS: &[GetExceptixFunction] = &[ + GetExceptixFunction { + code: &[ + 0x10, 0x20, 0x9f, 0xe5, // ldr r2, [pc, #0x10] + 0x10, 0x10, 0x9f, 0xe5, // ldr r1, [pc, #0x10] + 0x0c, 0x20, 0x80, 0xe5, // str r2, [r0, #0xc] + 0x10, 0x10, 0x80, 0xe5, // str r1, [r0, #0x10] + 0x01, 0x00, 0xa0, 0xe3, // mov r0, #1 + 0x1e, 0xff, 0x2f, 0xe1, // bx lr + ], + start_offset: 0x18, + end_offset: 0x1c, + }, + GetExceptixFunction { + code: &[ + 0x02, 0x49, // ldr r1, [pc, #0x8] + 0xc1, 0x60, // str r1, [r0, #0xc] + 0x02, 0x49, // ldr r1, [pc, #0x8] + 0x01, 0x61, // str r1, [r0, #0x10] + 0x01, 0x20, // movs r0, #1 + 0x70, 0x47, // bx lr + ], + start_offset: 0xc, + end_offset: 0x10, + }, +]; #[repr(C)] #[derive(Zeroable, Pod, Clone, Copy)] diff --git a/lib/src/analysis/functions.rs b/lib/src/analysis/functions.rs index b695655..09919b3 100644 --- a/lib/src/analysis/functions.rs +++ b/lib/src/analysis/functions.rs @@ -7,7 +7,7 @@ use std::{ use snafu::Snafu; use unarm::{ ArmVersion, Endian, Ins, ParseFlags, ParseMode, ParsedIns, Parser, - args::{Argument, Reg, Register}, + args::{Argument, Reg, Register, Shift, ShiftImm, ShiftReg}, arm, thumb, }; @@ -20,13 +20,14 @@ use super::{ secure_area::SecureAreaState, }; use crate::{ + analysis::illegal_code::ILLEGAL_CODE_PATTERNS, config::symbol::{SymbolMap, SymbolMapError}, util::bytes::FromSlice, }; // All keys in the types below are instruction addresses pub type Labels = BTreeSet; -pub type PoolConstants = BTreeSet; +pub type PoolConstants = BTreeMap; pub type JumpTables = BTreeMap; pub type InlineTables = BTreeMap; pub type FunctionCalls = BTreeMap; @@ -108,7 +109,12 @@ impl Function { Some((address as i32 + dest).try_into().unwrap()) } - fn is_pool_load(ins: Ins, parsed_ins: &ParsedIns, address: u32, thumb: bool) -> Option { + fn is_pool_load( + ins: Ins, + parsed_ins: &ParsedIns, + address: u32, + thumb: bool, + ) -> Option<(u32, Register)> { if ins.mnemonic() != "ldr" { return None; } @@ -124,7 +130,7 @@ impl Function { // ldr *, [pc + *] let load_address = (address as i32 + offset.value) as u32 & !3; let load_address = load_address + if thumb { 4 } else { 8 }; - Some(load_address) + Some((load_address, dest.reg)) } } _ => None, @@ -155,9 +161,10 @@ impl Function { fn function_parser_loop( mut parser: Parser<'_>, options: FunctionParseOptions, + found_functions: &BTreeMap, ) -> Result { let thumb = parser.mode == ParseMode::Thumb; - let mut context = ParseFunctionContext::new(thumb, options); + let mut context = ParseFunctionContext::new(thumb, options, found_functions); let Some((address, ins, parsed_ins)) = parser.next() else { return Err(FunctionAnalysisError::IntoFunction { @@ -185,7 +192,7 @@ impl Function { } }; - if let Some(first_pool_address) = function.pool_constants.first() + if let Some((first_pool_address, _)) = function.pool_constants.first_key_value() && *first_pool_address < function.start_address { log::info!( @@ -216,7 +223,7 @@ impl Function { let parser = Parser::new(parse_mode, *start_address, Endian::Little, PARSE_FLAGS, function_code); - Self::function_parser_loop(parser, options) + Self::function_parser_loop(parser, options, &BTreeMap::new()) } pub fn find_functions( @@ -260,6 +267,33 @@ impl Function { while !function_code.is_empty() && address <= *upper_bounds.first().unwrap_or(&last_function_address) { + for illegal_pattern in ILLEGAL_CODE_PATTERNS { + if function_code.starts_with(illegal_pattern) { + address += illegal_pattern.len() as u32; + function_code = &module_code[(address - base_address) as usize..]; + continue; + } + } + + // Skip if more than 10 consecutive valid pointer values, as that is most certainly not + // valid code at that point + let mut function_code_iter = function_code; + let mut pointer_count = 0; + while function_code_iter.len() > 4 { + let word: u32 = u32::from_le_slice(function_code_iter); + function_code_iter = &function_code_iter[4..]; + if (0x01ff8000..0x02400000).contains(&word) { + pointer_count += 1; + } else { + break; + } + } + if pointer_count >= 10 { + address += pointer_count * 4; + function_code = &module_code[(address - base_address) as usize..]; + continue; + } + let thumb = Function::is_thumb_function(address, function_code); let parse_mode = if thumb { ParseMode::Thumb } else { ParseMode::Arm }; @@ -272,18 +306,22 @@ impl Function { (format!("{default_name_prefix}{address:08x}"), true) }; - let function_result = Function::function_parser_loop(parser, FunctionParseOptions { - name, - start_address: address, - base_address, - module_code, - known_end_address: None, - module_start_address, - module_end_address, - existing_functions: search_options.existing_functions, - check_defs_uses: search_options.check_defs_uses, - parse_options: Default::default(), - }); + let function_result = Function::function_parser_loop( + parser, + FunctionParseOptions { + name, + start_address: address, + base_address, + module_code, + known_end_address: None, + module_start_address, + module_end_address, + existing_functions: search_options.existing_functions, + check_defs_uses: search_options.check_defs_uses, + parse_options: Default::default(), + }, + &functions, + ); let function = match function_result { Ok(function) => function, Err(FunctionAnalysisError::IntoFunction { @@ -383,7 +421,7 @@ impl Function { // Look for pointers to data in this module, to use as an upper bound for finding functions if search_options.use_data_as_upper_bound { - for pool_constant in function.iter_pool_constants(module_code, base_address) { + for pool_constant in function.iter_pool_constants() { let pointer_value = pool_constant.value & !1; if upper_bounds.contains(&pointer_value) { continue; @@ -392,22 +430,30 @@ impl Function { continue; } - let offset = (pointer_value - base_address) as usize; - if offset >= module_code.len() { - continue; - } + match &pool_constant.usage { + // Not data, skip + PoolConstantUsage::Call => continue, + // Maybe data, run basic check for whether it is code + PoolConstantUsage::Other => { + let offset = (pointer_value - base_address) as usize; + if offset >= module_code.len() { + continue; + } - let thumb = Function::is_thumb_function(pointer_value, &module_code[offset..]); - let mut parser = Parser::new( - if thumb { ParseMode::Thumb } else { ParseMode::Arm }, - pointer_value, - Endian::Little, - PARSE_FLAGS, - &module_code[offset..], - ); - let (address, ins, parsed_ins) = parser.next().unwrap(); - if is_valid_function_start(address, ins, &parsed_ins) { - continue; + let thumb = + Function::is_thumb_function(pointer_value, &module_code[offset..]); + let mut parser = Parser::new( + if thumb { ParseMode::Thumb } else { ParseMode::Arm }, + pointer_value, + Endian::Little, + PARSE_FLAGS, + &module_code[offset..], + ); + let (address, ins, parsed_ins) = parser.next().unwrap(); + if is_valid_function_start(address, ins, &parsed_ins) { + continue; + } + } } // The pool constant points to data, limit the upper bound @@ -433,7 +479,7 @@ impl Function { for address in self.labels.iter() { symbol_map.add_label(*address, self.thumb)?; } - for address in self.pool_constants.iter() { + for (address, _) in self.pool_constants.iter() { symbol_map.add_pool_constant(*address)?; } for jump_table in self.jump_tables() { @@ -545,16 +591,8 @@ impl Function { &self.pool_constants } - pub fn iter_pool_constants<'a>( - &'a self, - module_code: &'a [u8], - base_address: u32, - ) -> impl Iterator + 'a { - self.pool_constants.iter().map(move |&address| { - let start = (address - base_address) as usize; - let bytes = &module_code[start..]; - PoolConstant { address, value: u32::from_le_slice(bytes) } - }) + pub fn iter_pool_constants(&self) -> impl Iterator { + self.pool_constants.values() } pub fn function_calls(&self) -> &FunctionCalls { @@ -605,6 +643,10 @@ struct ParseFunctionContext<'a> { module_start_address: u32, module_end_address: u32, existing_functions: Option<&'a BTreeMap>, + found_functions: &'a BTreeMap, + base_address: u32, + /// The code for this module, starting at `base_address` + code: &'a [u8], /// Address of last conditional instruction, so we can detect the final return instruction last_conditional_destination: Option, @@ -612,6 +654,7 @@ struct ParseFunctionContext<'a> { last_pool_address: Option, /// State machine for detecting jump tables and adding them as symbols jump_table_state: JumpTableState, + jump_table_end_address: Option, /// State machine for detecting branches (B, not BL) to other functions function_branch_state: FunctionBranchState, /// State machine for detecting inline data tables within the function @@ -622,12 +665,18 @@ struct ParseFunctionContext<'a> { /// Whether to check that all registers used in the instruction are defined check_defs_uses: bool, defined_registers: BTreeSet, + register_values: [Option<(u32, RegValueSrc)>; 16], prev_ins: Option, prev_parsed_ins: Option, prev_address: Option, } +#[derive(Clone, Copy)] +enum RegValueSrc { + PoolConstant(u32), +} + #[derive(Debug, Snafu)] pub enum IntoFunctionError { #[snafu(display("Cannot turn parse context into function before parsing is done"))] @@ -637,15 +686,21 @@ pub enum IntoFunctionError { } impl<'a> ParseFunctionContext<'a> { - pub fn new(thumb: bool, options: FunctionParseOptions<'a>) -> Self { + pub fn new( + thumb: bool, + options: FunctionParseOptions<'a>, + found_functions: &'a BTreeMap, + ) -> Self { let FunctionParseOptions { name, start_address, + base_address, known_end_address, module_start_address, module_end_address, existing_functions, check_defs_uses, + module_code, .. } = options; @@ -661,6 +716,9 @@ impl<'a> ParseFunctionContext<'a> { defined_registers.insert(Register::Pc); // Could be used as a scratch register defined_registers.insert(Register::R12); + // Sometimes not callee-saved + defined_registers.insert(Register::R10); + defined_registers.insert(Register::R11); Self { name, @@ -677,6 +735,9 @@ impl<'a> ParseFunctionContext<'a> { module_start_address, module_end_address, existing_functions, + found_functions, + base_address, + code: module_code, last_conditional_destination: None, last_pool_address: None, @@ -685,12 +746,14 @@ impl<'a> ParseFunctionContext<'a> { } else { JumpTableState::Arm(Default::default()) }, + jump_table_end_address: None, function_branch_state: Default::default(), inline_table_state: Default::default(), illegal_code_state: Default::default(), check_defs_uses, defined_registers, + register_values: [None; 16], prev_ins: None, prev_parsed_ins: None, @@ -705,7 +768,7 @@ impl<'a> ParseFunctionContext<'a> { ins: Ins, parsed_ins: &ParsedIns, ) -> ParseFunctionState { - if self.pool_constants.contains(&address) { + if self.pool_constants.contains_key(&address) { parser.seek_forward(address + 4); return ParseFunctionState::Continue; } @@ -716,11 +779,19 @@ impl<'a> ParseFunctionContext<'a> { self.jump_table_state = self.jump_table_state.handle(address, ins, parsed_ins, &mut self.jump_tables); - self.last_conditional_destination = - self.last_conditional_destination.max(self.jump_table_state.table_end_address()); - if let Some(label) = self.jump_table_state.get_label(address, ins) { + if let Some(table_end_address) = self.jump_table_state.table_end_address() { + self.last_conditional_destination = + self.last_conditional_destination.max(Some(table_end_address)); + self.jump_table_end_address = Some(table_end_address); + } + if let Some((label, second_label)) = self.jump_table_state.get_labels(address, ins) { self.labels.insert(label); self.last_conditional_destination = self.last_conditional_destination.max(Some(label)); + if let Some(second_label) = second_label { + self.labels.insert(second_label); + self.last_conditional_destination = + self.last_conditional_destination.max(Some(second_label)); + } } if self.jump_table_state.is_numerical_jump_offset() { @@ -749,6 +820,61 @@ impl<'a> ParseFunctionContext<'a> { return ParseFunctionState::IllegalIns { address, ins }; } + if let Some(destination) = Function::is_branch(ins, parsed_ins, address) { + if destination < self.start_address + && let Some((_, function)) = self.found_functions.range(..=destination).last() + && function.start_address < destination + { + let thumb = matches!(ins, Ins::Thumb(_)); + if thumb != function.is_thumb() { + // Instruction mode must match + return ParseFunctionState::IllegalIns { address, ins }; + } + } + + if !(0x01ff8000..0x03000000).contains(&destination) { + // Branch goes outside of program + return ParseFunctionState::IllegalIns { address, ins }; + } + } + + // Check register usage + #[allow(clippy::single_match)] // Remove this line if more cases are added + match (parsed_ins.mnemonic, &parsed_ins.args[0]) { + ("bx", Argument::Reg(Reg { reg, .. })) => { + if let Some((_, src)) = &self.register_values[*reg as usize] { + match src { + RegValueSrc::PoolConstant(pool_address) => { + self.pool_constants.get_mut(pool_address).unwrap().usage = + PoolConstantUsage::Call; + } + } + } + } + _ => {} + } + + // Clear tracked register values + if let Some(defs) = match ins { + Ins::Arm(ins) => Some(ins.defs(&PARSE_FLAGS)), + Ins::Thumb(ins) => Some(ins.defs(&PARSE_FLAGS)), + Ins::Data => None, + } { + for def in defs { + match def { + Argument::Reg(reg) => { + self.register_values[reg.reg as usize] = None; + } + Argument::RegList(reg_list) => { + for reg in reg_list.iter() { + self.register_values[reg as usize] = None; + } + } + _ => {} + } + } + } + let in_conditional_block = Some(address) < self.last_conditional_destination; let is_return = self.is_return( ins, @@ -825,14 +951,39 @@ impl<'a> ParseFunctionContext<'a> { } } else { // Normal branch instruction, insert a label - if let Some(state) = self.handle_label(destination, address, parser, ins_size) { + if let Some(state) = self.handle_label( + destination, + address, + parser, + ins_size, + ins, + in_conditional_block, + ) { return state; } } } - if let Some(pool_address) = Function::is_pool_load(ins, parsed_ins, address, self.thumb) { - self.pool_constants.insert(pool_address); + if let Some((pool_address, register)) = + Function::is_pool_load(ins, parsed_ins, address, self.thumb) + { + let start = (pool_address - self.base_address) as usize; + let Some(bytes) = self.code.get(start..) else { + log::debug!( + "Illegal instruction at {:#010x}: Pool load goes outside module", + address + ); + return ParseFunctionState::IllegalIns { address, ins }; + }; + let const_value = u32::from_le_slice(bytes); + self.register_values[register as usize] = + Some((const_value, RegValueSrc::PoolConstant(pool_address))); + + self.pool_constants.insert(pool_address, PoolConstant { + address: pool_address, + value: const_value, + usage: PoolConstantUsage::Other, + }); self.last_pool_address = self.last_pool_address.max(Some(pool_address)); } @@ -944,13 +1095,18 @@ impl<'a> ParseFunctionContext<'a> { address: u32, parser: &mut Parser, ins_size: u32, + ins: Ins, + in_conditional_block: bool, ) -> Option { self.labels.insert(destination); - self.last_conditional_destination = - self.last_conditional_destination.max(Some(destination)); + let is_table_jump = self.jump_table_end_address.map(|end| address < end).unwrap_or(false); + if in_conditional_block || ins.is_conditional() || is_table_jump { + self.last_conditional_destination = + self.last_conditional_destination.max(Some(destination)); + } let next_address = address + ins_size; - if self.pool_constants.contains(&next_address) { + if self.pool_constants.contains_key(&next_address) { let branch_backwards = destination <= address; // Load instructions in ARM mode can have an offset of up to ±4kB. Therefore, some functions must @@ -977,7 +1133,7 @@ impl<'a> ParseFunctionContext<'a> { } else { let after_pools = (next_address..) .step_by(4) - .find(|addr| !self.pool_constants.contains(addr)) + .find(|addr| !self.pool_constants.contains_key(addr)) .unwrap(); log::warn!( "No label past constant pool at {:#x}, jumping to first address not occupied by a pool constant ({:#x})", @@ -1040,34 +1196,51 @@ impl<'a> ParseFunctionContext<'a> { } let args = &parsed_ins.args; - match (parsed_ins.mnemonic, args[0], args[1]) { + match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3]) { // bx * - ("bx", _, _) => true, + ("bx", _, _, _, _) => true, // mov pc, * - ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true, + ("mov", Argument::Reg(Reg { reg: Register::Pc, .. }), _, _, _) => true, // ldmia *, {..., pc} - ("ldmia", _, Argument::RegList(reg_list)) if reg_list.contains(Register::Pc) => true, + ("ldmia", _, Argument::RegList(reg_list), _, _) if reg_list.contains(Register::Pc) => { + true + } // pop {..., pc} - ("pop", Argument::RegList(reg_list), _) if reg_list.contains(Register::Pc) => true, - // backwards branch - ("b", Argument::BranchDest(offset), _) if offset < 0 => { - // Branch must be within current function (infinite loop) or outside current module (tail call) - Function::is_branch(ins, parsed_ins, address) - .map(|destination| { - destination >= function_start - || destination < module_start_address - || destination >= module_end_address - }) - .unwrap_or(false) + ("pop", Argument::RegList(reg_list), _, _, _) if reg_list.contains(Register::Pc) => { + true } + // backwards branch + ("b", Argument::BranchDest(offset), _, _, _) if offset < 0 => true, // subs pc, lr, * ( "subs", Argument::Reg(Reg { reg: Register::Pc, .. }), Argument::Reg(Reg { reg: Register::Lr, .. }), + _, + _, ) => true, // ldr pc, * - ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _) => true, + ("ldr", Argument::Reg(Reg { reg: Register::Pc, .. }), _, _, _) => true, + // eor pc, r*, r*, ror r* + // Yeah this makes no sense but it's real and exists at 0x020d2888 of ov022 in the + // European version of Mario & Luigi: Bowser's Inside Story + ( + "eor", + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(_), + Argument::Reg(_), + Argument::ShiftReg(ShiftReg { op: Shift::Ror, reg: _ }), + ) => true, + // add pc, r*, r*, lsl #* + // Another weird one from Bowser's Inside Story's ITCM module (0x01ff84f8 in EU version) + // An exception is `add pc, pc, r*, lsl #0x2` which is for jump tables and not a return + ( + "add", + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(Reg { reg, .. }), + Argument::Reg(_), + Argument::ShiftImm(ShiftImm { op: Shift::Lsl, imm: _ }), + ) if reg != Register::Pc => true, _ => false, } } @@ -1184,7 +1357,15 @@ pub struct CalledFunction { pub thumb: bool, } +#[derive(Debug, Clone)] pub struct PoolConstant { pub address: u32, pub value: u32, + pub usage: PoolConstantUsage, +} + +#[derive(Debug, Clone)] +pub enum PoolConstantUsage { + Call, + Other, } diff --git a/lib/src/analysis/illegal_code.rs b/lib/src/analysis/illegal_code.rs index b01dcdf..bdce543 100644 --- a/lib/src/analysis/illegal_code.rs +++ b/lib/src/analysis/illegal_code.rs @@ -20,6 +20,16 @@ impl IllegalCodeState { return Self::Illegal; } + if matches!(ins, Ins::Thumb(_)) + && parsed_ins.mnemonic == "lsl" + && let Arg::Reg(Reg { reg: Register::R0, .. }) = parsed_ins.args[0] + && let Arg::Reg(Reg { reg: Register::R0, .. }) = parsed_ins.args[1] + && let Arg::UImm(0) = parsed_ins.args[2] + { + // In Thumb with divided syntax, 0000 disassembles into lsl r0, r0, #0x0 and is a no-op + return Self::Illegal; + } + let args = &parsed_ins.args; match (self, ins.mnemonic(), args[0], args[1], args[2]) { // Find registers with shifted value @@ -59,3 +69,5 @@ impl IllegalCodeState { self == Self::Illegal } } + +pub const ILLEGAL_CODE_PATTERNS: &[&[u8]] = &[&[0x00, 0x02, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00]]; diff --git a/lib/src/analysis/jump_table.rs b/lib/src/analysis/jump_table.rs index d4cd9d7..68357a7 100644 --- a/lib/src/analysis/jump_table.rs +++ b/lib/src/analysis/jump_table.rs @@ -9,8 +9,13 @@ use super::functions::JumpTables; pub struct JumpTable { pub address: u32, pub size: u32, - /// If true, the jump table entries are instructions. Otherwise, they are data. - pub code: bool, + pub kind: JumpTableKind, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum JumpTableKind { + Arm, + Thumb(ThumbJumpTableKind), } #[derive(Clone, Copy, Debug)] @@ -40,10 +45,10 @@ impl JumpTableState { } } - pub fn get_label(&self, address: u32, ins: Ins) -> Option { + pub fn get_labels(&self, address: u32, ins: Ins) -> Option<(u32, Option)> { match self { Self::Arm(_) => None, - Self::Thumb(state) => state.get_label(address, ins), + Self::Thumb(state) => state.get_labels(address, ins), } } @@ -64,16 +69,20 @@ pub enum JumpTableStateArm { /// `...` other non-comparing instructions /// `addls pc, pc, index, lsl #0x2` jump to nearby branch instruction, OR /// `bgt @skip` skip jump table if SIGNED index is out of bounds + /// `ldmiahi sp!, {...}` return if index is out of bounds JumpOrBranchSigned { index: Register, limit: u32 }, /// if index is signed: - /// `cmp index, #0x0` check that the index is non-negative + /// `cmp index, #0x0` check that the index is non-negative SignedBaseline { index: Register, limit: u32 }, /// if index is signed: /// `addge pc, pc, index, lsl #0x2` jump to nearby branch instruction JumpSigned { index: Register, limit: u32 }, + /// `add pc, pc, index, lsl #0x2` jump to nearby branch instruction + JumpAfterReturn { index: Register, limit: u32 }, + /// valid table detected, starts from `table_address` with a size of `limit` ValidJumpTable { table_address: u32, limit: u32 }, } @@ -125,7 +134,7 @@ impl JumpTableStateArm { jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: true, + kind: JumpTableKind::Arm, }); Self::ValidJumpTable { table_address: address + 8, limit } } @@ -137,6 +146,14 @@ impl JumpTableStateArm { Argument::None, Argument::None, ) => Self::SignedBaseline { index, limit }, + ( + "ldmhiia", + Argument::Reg(Reg { reg: Register::Sp, writeback: true, .. }), + Argument::RegList(_), + Argument::None, + Argument::None, + Argument::None, + ) => Self::JumpAfterReturn { index, limit }, _ if ins.updates_condition_flags() => Self::default(), _ => self, } @@ -166,7 +183,30 @@ impl JumpTableStateArm { jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: true, + kind: JumpTableKind::Arm, + }); + Self::ValidJumpTable { table_address: address + 8, limit } + } + _ if ins.updates_condition_flags() => Self::default(), + _ => self, + } + } + Self::JumpAfterReturn { index, limit } => { + match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3], args[4]) { + ( + "add", + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::Reg(Reg { reg, .. }), + Argument::ShiftImm(ShiftImm { imm: 2, op: Shift::Lsl }), + Argument::None, + ) if reg == index => { + let table_address = address + 8; + let size = (limit + 1) * 4; + jump_tables.insert(table_address, JumpTable { + address: table_address, + size, + kind: JumpTableKind::Arm, }); Self::ValidJumpTable { table_address: address + 8, limit } } @@ -218,25 +258,37 @@ pub enum JumpTableStateThumb { BranchNegative { index: Register, limit: u32 }, /// `add offset, index, index` multiply index by 2 to calculate jump table offset + /// `mov offset, index` multiply index by 1 (8-bit table items) AddRegReg { index: Register, limit: u32 }, /// `add offset, pc` turn jump table offset into a PC-relative address AddRegPc { offset: Register, limit: u32 }, /// `ldrh jump, [offset, #imm]` load 16-bit jump value from table + /// `ldrb jump, [offset, #imm]` load 8-bit jump value from table LoadOffset { offset: Register, limit: u32, pc_base: u32 }, /// `lsl jump, jump, #0x10` sign extend - SignExtendLsl { jump: Register, table_address: u32, limit: u32 }, + SignExtendLsl { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// `asr jump, jump, #0x10` sign extend - SignExtendAsr { jump: Register, table_address: u32, limit: u32 }, + SignExtendAsr { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// `add pc, jump` do the jump - AddPcReg { jump: Register, table_address: u32, limit: u32 }, + /// `add jump, pc` calculate the jump destination + AddPcReg { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, + + /// `bx jump` jump to the destination + BxJump { jump: Register, table_address: u32, limit: u32, kind: ThumbJumpTableKind }, /// valid table detected, starts from `table_address` with a size of `limit` - ValidJumpTable { table_address: u32, limit: u32 }, + ValidJumpTable { table_address: u32, limit: u32, kind: ThumbJumpTableKind }, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ThumbJumpTableKind { + Halfword, + Byte, } impl JumpTableStateThumb { @@ -260,9 +312,12 @@ impl JumpTableStateThumb { parsed_ins: &ParsedIns, jump_tables: &mut JumpTables, ) -> Self { - if let Some(start) = self.check_start(parsed_ins) { + if let Some(end_address) = self.table_end_address() + && address < end_address + { + } else if let Some(start) = self.check_start(parsed_ins) { return start; - }; + } let args = &parsed_ins.args; match self { @@ -333,6 +388,19 @@ impl JumpTableStateThumb { Self::default() } } + ( + "mov", + Argument::Reg(Reg { reg: table_offset, .. }), + Argument::Reg(Reg { reg, .. }), + Argument::None, + Argument::None, + ) => { + if reg == index { + Self::AddRegPc { offset: table_offset, limit } + } else { + Self::default() + } + } _ => Self::default(), } } @@ -363,12 +431,32 @@ impl JumpTableStateThumb { Argument::None, ) if reg == base_reg => { let table_start = (pc_base as i32 - 2 + value * 2) as u32; - Self::SignExtendLsl { jump: offset, table_address: table_start, limit } + Self::SignExtendLsl { + jump: offset, + table_address: table_start, + limit, + kind: ThumbJumpTableKind::Halfword, + } + } + ( + "ldrb", + Argument::Reg(Reg { reg, .. }), + Argument::Reg(Reg { reg: base_reg, deref: true, .. }), + Argument::OffsetImm(OffsetImm { post_indexed: false, value }), + Argument::None, + ) if reg == base_reg => { + let table_start = (pc_base as i32 - 2 + value * 2) as u32; + Self::SignExtendLsl { + jump: offset, + table_address: table_start, + limit, + kind: ThumbJumpTableKind::Byte, + } } _ => Self::default(), } } - Self::SignExtendLsl { jump, table_address, limit } => { + Self::SignExtendLsl { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3]) { ( "lsl", @@ -377,12 +465,12 @@ impl JumpTableStateThumb { Argument::UImm(value), Argument::None, ) if dest_reg == src_reg && dest_reg == jump && value == 0x10 => { - Self::SignExtendAsr { jump, table_address, limit } + Self::SignExtendAsr { jump, table_address, limit, kind } } _ => Self::default(), } } - Self::SignExtendAsr { jump, table_address, limit } => { + Self::SignExtendAsr { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1], args[2], args[3]) { ( "asr", @@ -391,12 +479,12 @@ impl JumpTableStateThumb { Argument::UImm(value), Argument::None, ) if dest_reg == src_reg && dest_reg == jump && value == 0x10 => { - Self::AddPcReg { jump, table_address, limit } + Self::AddPcReg { jump, table_address, limit, kind } } _ => Self::default(), } } - Self::AddPcReg { jump, table_address, limit } => { + Self::AddPcReg { jump, table_address, limit, kind } => { match (parsed_ins.mnemonic, args[0], args[1], args[2]) { ( "add", @@ -404,39 +492,76 @@ impl JumpTableStateThumb { Argument::Reg(Reg { reg, .. }), Argument::None, ) if reg == jump => { - let size = (limit + 1) * 2; + let size = (limit + 1) * kind.item_size(); jump_tables.insert(table_address, JumpTable { address: table_address, size, - code: false, + kind: JumpTableKind::Thumb(kind), }); - Self::ValidJumpTable { table_address, limit } + Self::ValidJumpTable { table_address, limit, kind } } + ( + "add", + Argument::Reg(Reg { reg, .. }), + Argument::Reg(Reg { reg: Register::Pc, .. }), + Argument::None, + ) if reg == jump => Self::BxJump { jump, table_address, limit, kind }, _ => Self::default(), } } - Self::ValidJumpTable { table_address, limit } => { - let end = table_address + limit * 2; - if address > end { Self::default() } else { self } + Self::BxJump { jump, table_address, limit, kind } => { + match (parsed_ins.mnemonic, args[0], args[1]) { + ("bx", Argument::Reg(Reg { reg, .. }), Argument::None) if reg == jump => { + let table_address = table_address - 2; + let size = (limit + 1) * kind.item_size(); + jump_tables.insert(table_address, JumpTable { + address: table_address, + size, + kind: JumpTableKind::Thumb(kind), + }); + Self::ValidJumpTable { table_address, limit, kind } + } + _ => Self::default(), + } + } + Self::ValidJumpTable { table_address, limit, kind } => { + let end = table_address + (limit + 1) * kind.item_size(); + if address >= end { Self::default() } else { self } } } } pub fn table_end_address(&self) -> Option { match self { - Self::ValidJumpTable { table_address, limit } => Some(table_address + (limit + 1) * 2), + Self::ValidJumpTable { table_address, limit, kind } => { + Some(table_address + (limit + 1) * kind.item_size()) + } _ => None, } } - pub fn get_label(&self, address: u32, ins: Ins) -> Option { + pub fn get_labels(&self, address: u32, ins: Ins) -> Option<(u32, Option)> { match self { - Self::ValidJumpTable { table_address, limit } => { - let end = table_address + limit * 2; + Self::ValidJumpTable { table_address, limit, kind } => { + let end = table_address + limit * kind.item_size(); if address < *table_address || address > end { None } else { - Some((*table_address as i32 + ins.code() as i16 as i32 + 2) as u32) + let code = ins.code() as i16; + match kind { + ThumbJumpTableKind::Halfword => { + Some(((*table_address as i32 + code as i32 + 2) as u32, None)) + } + ThumbJumpTableKind::Byte => { + let [first_value, second_value] = code.to_le_bytes(); + let first_value = first_value as i8 as i32; + let second_value = second_value as i8 as i32; + Some(( + (*table_address as i32 + first_value + 2) as u32, + Some((*table_address as i32 + second_value + 2) as u32), + )) + } + } } } _ => None, @@ -447,3 +572,12 @@ impl JumpTableStateThumb { matches!(self, JumpTableStateThumb::ValidJumpTable { .. }) } } + +impl ThumbJumpTableKind { + fn item_size(self) -> u32 { + match self { + ThumbJumpTableKind::Halfword => 2, + ThumbJumpTableKind::Byte => 1, + } + } +} diff --git a/lib/src/analysis/main.rs b/lib/src/analysis/main.rs index 3c0c7fa..442b86b 100644 --- a/lib/src/analysis/main.rs +++ b/lib/src/analysis/main.rs @@ -37,9 +37,12 @@ impl MainFunction { let mut parser = function.parser(module_code, base_address); let ins_size = parser.mode.instruction_size(0) as u32; - let last_ins_addr = - function.pool_constants().first().ok_or_else(|| NoPoolConstantsSnafu.build())? - - ins_size; + let last_ins_addr = function + .pool_constants() + .first_key_value() + .map(|(pool_addr, _)| pool_addr) + .ok_or_else(|| NoPoolConstantsSnafu.build())? + - ins_size; parser.seek_forward(last_ins_addr); let (_, _, last_ins) = parser.next().unwrap(); @@ -51,7 +54,7 @@ impl MainFunction { let mut p_tail_call = None; for (address, _ins, parsed_ins) in function.parser(module_code, base_address) { - if function.pool_constants().contains(&address) { + if function.pool_constants().contains_key(&address) { break; } let args = &parsed_ins.args; diff --git a/lib/src/analysis/mod.rs b/lib/src/analysis/mod.rs index dbc161a..0e00830 100644 --- a/lib/src/analysis/mod.rs +++ b/lib/src/analysis/mod.rs @@ -6,6 +6,8 @@ mod function_start; pub mod functions; mod illegal_code; mod inline_table; -pub(crate) mod jump_table; +pub mod jump_table; pub(crate) mod main; pub mod secure_area; + +pub use data::FindLocalDataError; diff --git a/lib/src/config/module.rs b/lib/src/config/module.rs index dd24350..4f54c84 100644 --- a/lib/src/config/module.rs +++ b/lib/src/config/module.rs @@ -22,7 +22,7 @@ use super::{ use crate::{ analysis::{ ctor::{CtorRange, CtorRangeError}, - data::{self, FindLocalDataOptions}, + data::{self, FindLocalDataOptions, find_function_labels}, exception::{ExceptionData, ExceptionDataError}, functions::{ FindFunctionsOptions, Function, FunctionAnalysisError, FunctionParseOptions, @@ -198,6 +198,7 @@ impl Module { let symbol_map = symbol_maps.get_mut(module.kind); module.find_sections_arm9(symbol_map, &ctor_range, exception_data, arm9)?; + find_function_labels(&module, symbol_map, options)?; module.find_data_from_pools( symbol_map, options, @@ -263,7 +264,7 @@ impl Module { default_data_prefix: format!("data_ov{:03}_", overlay.id()), default_sinit_prefix: format!("__sinit_ov{:03}_", overlay.id()), sections: Sections::new(), - signed: overlay.is_signed(), + signed: overlay.originally_signed(), }; let symbol_map = symbol_maps.get_mut(module.kind); @@ -272,6 +273,7 @@ impl Module { start: overlay.ctor_start(), end: overlay.ctor_end(), })?; + find_function_labels(&module, symbol_map, options)?; module.find_data_from_pools(symbol_map, options, None)?; module.find_data_from_sections(symbol_map, options)?; @@ -330,6 +332,7 @@ impl Module { let symbol_map = symbol_maps.get_mut(module.kind); module.find_sections_itcm(symbol_map)?; + find_function_labels(&module, symbol_map, options)?; module.find_data_from_pools(symbol_map, options, None)?; Ok(module) @@ -385,8 +388,10 @@ impl Module { let symbol_map = symbol_maps.get_mut(module.kind); module.find_sections_unknown_autoload(symbol_map, autoload)?; - module.find_data_from_pools(symbol_maps.get_mut(module.kind), options, None)?; - module.find_data_from_sections(symbol_maps.get_mut(module.kind), options)?; + + find_function_labels(&module, symbol_map, options)?; + module.find_data_from_pools(symbol_map, options, None)?; + module.find_data_from_sections(symbol_map, options)?; Ok(module) } @@ -402,13 +407,12 @@ impl Module { if sym_function.unknown { continue; } - let offset = symbol.addr - base_address; let size = sym_function.size; let parse_result = Function::parse_function(FunctionParseOptions { name: symbol.name.clone(), start_address: symbol.addr, - base_address: symbol.addr, - module_code: &code[offset as usize..], + base_address, + module_code: code, known_end_address: Some(symbol.addr + size), module_start_address: base_address, module_end_address: end_address, @@ -756,14 +760,14 @@ impl Module { let exception_start = exception_data.as_ref().and_then(ExceptionData::exception_start); let text_max = exception_start.unwrap_or(read_only_end); let main_start = self.find_build_info_end_address(arm9); - let FoundFunctions { functions: text_functions, end: mut text_end, .. } = self + let FoundFunctions { functions: text_functions, end: text_end, .. } = self .find_functions( symbol_map, FunctionSearchOptions { start_address: Some(main_start), end_address: Some(text_max), // Skips over segments of strange EOR instructions which are never executed - max_function_start_search_distance: u32::MAX, + max_function_start_search_distance: 0x2000, use_data_as_upper_bound: true, // There are some handwritten assembly functions in ARM9 main that don't follow the procedure call standard check_defs_uses: false, @@ -777,7 +781,7 @@ impl Module { self.add_text_section(FoundFunctions { functions, start: text_start, end: text_end })?; // Add .exception and .exceptix sections if they exist - if let Some(exception_data) = exception_data { + let text_exceptix_end = if let Some(exception_data) = exception_data { if let Some(exception_start) = exception_data.exception_start() { self.sections.add(Section::new(SectionOptions { name: ".exception".to_string(), @@ -800,11 +804,13 @@ impl Module { comments: Comments::new(), })?)?; - text_end = exception_data.exceptix_end(); - } + exception_data.exceptix_end() + } else { + text_end + }; // .rodata - let rodata_start = rodata_start.unwrap_or(text_end); + let rodata_start = rodata_start.unwrap_or(text_exceptix_end); self.add_rodata_section(rodata_start, ctor.start)?; // .data and .bss @@ -818,12 +824,16 @@ impl Module { if let Some(section_after_text) = section_after_text && text_end != section_after_text.start_address() { + let next_start = section_after_text.start_address(); log::warn!( - "Expected .text to end ({:#010x}) where {} starts ({:#010x})", + "Expected .text to end ({:#010x}) where {} starts ({:#010x}), extending .text to remove the gap", text_end, section_after_text.name(), section_after_text.start_address() ); + + let (_, text_section) = self.sections.by_name_mut(".text").unwrap(); + text_section.set_end_address(next_start); } Ok(()) @@ -855,7 +865,7 @@ impl Module { } fn find_sections_itcm(&mut self, symbol_map: &mut SymbolMap) -> Result<(), ModuleError> { - let text_functions = self + let mut text_functions = self .find_functions( symbol_map, FunctionSearchOptions { @@ -868,6 +878,8 @@ impl Module { &self.default_func_prefix.clone(), )? .ok_or_else(|| NoItcmFunctionsSnafu.build())?; + // Force .text start to base address for cases where first function is not at the base address + text_functions.start = self.base_address; let text_end = text_functions.end; self.add_text_section(text_functions)?; diff --git a/lib/src/config/symbol.rs b/lib/src/config/symbol.rs index 8ad2b60..a582f3d 100644 --- a/lib/src/config/symbol.rs +++ b/lib/src/config/symbol.rs @@ -13,7 +13,10 @@ use snafu::{Snafu, ensure}; use super::{ParseContext, config::Config, iter_attributes, module::ModuleKind}; use crate::{ - analysis::{functions::Function, jump_table::JumpTable}, + analysis::{ + functions::Function, + jump_table::{JumpTable, JumpTableKind}, + }, config::{CommentedLine, Comments}, util::{ io::{FileError, create_file}, @@ -489,7 +492,22 @@ impl SymbolMap { thumb: bool, ) -> Result<(SymbolId, &Symbol), SymbolMapError> { let name = Self::label_name(addr); - self.add_if_new_address(Symbol::new_external_label(name, addr, thumb)) + if let Some((existing_id, _)) = self.by_address(addr)? { + let existing_symbol = self.get_mut(existing_id).unwrap(); + if let SymbolKind::Label(existing_label) = &mut existing_symbol.kind { + existing_label.external = true; + Ok((existing_id, existing_symbol)) + } else { + MultipleSymbolsSnafu { + address: addr, + name, + other_name: existing_symbol.name.clone(), + } + .fail() + } + } else { + Ok(self.add(Symbol::new_external_label(name, addr, thumb))) + } } pub fn get_label(&self, addr: u32) -> Result, SymbolMapError> { @@ -548,7 +566,7 @@ impl SymbolMap { table: &JumpTable, ) -> Result<(SymbolId, &Symbol), SymbolMapError> { let name = Self::label_name(table.address); - self.add_if_new_address(Symbol::new_jump_table(name, table.address, table.size, table.code)) + self.add_if_new_address(Symbol::new_jump_table(name, table.address, table.size, table.kind)) } pub fn add_data( @@ -970,10 +988,10 @@ impl Symbol { } } - pub fn new_jump_table(name: String, addr: u32, size: u32, code: bool) -> Self { + pub fn new_jump_table(name: String, addr: u32, size: u32, kind: JumpTableKind) -> Self { Self { name, - kind: SymbolKind::JumpTable(SymJumpTable { size, code }), + kind: SymbolKind::JumpTable(SymJumpTable { size, kind }), addr, ambiguous: false, local: true, @@ -1286,7 +1304,7 @@ impl Display for InstructionMode { #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct SymJumpTable { pub size: u32, - pub code: bool, + pub kind: JumpTableKind, } #[derive(Clone, Copy, PartialEq, Eq, Debug)]