Skip to content

Commit

Permalink
Modified MachORepresentable.Strings to support encodings other than UTF8
Browse files Browse the repository at this point in the history
  • Loading branch information
p-x9 committed Feb 3, 2025
1 parent 1152114 commit a0aaa3c
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 26 deletions.
20 changes: 20 additions & 0 deletions Sources/MachOKit/Extension/UnsafePointer+.swift
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,23 @@ extension UnsafePointer<CChar> {
return (string, offset)
}
}

extension UnsafePointer where Pointee: FixedWidthInteger {
func findNullTerminator() -> UnsafePointer<Pointee> {
var ptr = self
while ptr.pointee != 0 {
ptr = ptr.advanced(by: 1)
}
return ptr
}

func readString<Encoding: _UnicodeEncoding>(
as encoding: Encoding.Type
) -> (String, Int) where Pointee == Encoding.CodeUnit {
let nullTerminator = findNullTerminator()
let offset = Int(bitPattern: nullTerminator) + MemoryLayout<Pointee>.size - Int(bitPattern: self)
let string = String(decodingCString: self, as: Encoding.self)

return (string, offset)
}
}
2 changes: 1 addition & 1 deletion Sources/MachOKit/LoadCommand/LinkerOptionCommand.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ extension LinkerOptionCommand {
let ptr = cmdsStart
.advanced(by: offset)
.advanced(by: layoutSize)
.assumingMemoryBound(to: CChar.self)
.assumingMemoryBound(to: UInt8.self)
let strings = MachOImage.Strings(
basePointer: ptr,
tableSize: Int(layout.cmdsize) - layoutSize
Expand Down
2 changes: 1 addition & 1 deletion Sources/MachOKit/LoadCommand/Model/Section.swift
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ extension SectionProtocol {
}
let tableSize = size
return MachOImage.Strings(
basePointer: basePointer.assumingMemoryBound(to: CChar.self),
basePointer: basePointer.assumingMemoryBound(to: UInt8.self),
tableSize: tableSize
)
}
Expand Down
43 changes: 34 additions & 9 deletions Sources/MachOKit/MachOFile+Strings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
import Foundation

extension MachOFile {
public struct Strings: Sequence {
public typealias Strings = UnicodeStrings<UTF8>
public typealias UTF16Strings = UnicodeStrings<UTF16>

public struct UnicodeStrings<Encoding: _UnicodeEncoding>: StringTable {
public let data: Data

/// file offset of string table start
Expand All @@ -18,38 +21,48 @@ extension MachOFile {
/// size of string table
public let size: Int

public let isLittleEndian: Bool

public func makeIterator() -> Iterator {
.init(data: data)
.init(data: data, isLittleEndian: isLittleEndian)
}
}
}

extension MachOFile.Strings {
init(machO: MachOFile, offset: Int, size: Int) {
extension MachOFile.UnicodeStrings {
init(
machO: MachOFile,
offset: Int,
size: Int,
isLittleEndian: Bool = false
) {
let data = machO.fileHandle.readData(
offset: numericCast(offset),
size: size
)
self.init(
data: data,
offset: offset,
size: size
size: size,
isLittleEndian: isLittleEndian
)
}
}

extension MachOFile.Strings {
extension MachOFile.UnicodeStrings {
public struct Iterator: IteratorProtocol {
public typealias Element = StringTableEntry

private let data: Data
private let tableSize: Int
private let isLittleEndian: Bool

private var nextOffset: Int

init(data: Data) {
init(data: Data, isLittleEndian: Bool) {
self.data = data
self.tableSize = data.count
self.isLittleEndian = isLittleEndian
self.nextOffset = 0
}

Expand All @@ -60,8 +73,20 @@ extension MachOFile.Strings {

let ptr = baseAddress
.advanced(by: nextOffset)
.assumingMemoryBound(to: UInt8.self)
let (string, offset) = ptr.readString()
.assumingMemoryBound(to: Encoding.CodeUnit.self)
var (string, offset) = ptr.readString(as: Encoding.self)

if isLittleEndian {
let data = Data(bytes: ptr, count: offset)
string = data.withUnsafeBytes {
let baseAddress = $0.baseAddress!
.assumingMemoryBound(to: Encoding.CodeUnit.self)
return .init(
decodingCString: baseAddress,
as: Encoding.self
)
}
}

let result = Element(string: string, offset: nextOffset)

Expand Down
71 changes: 57 additions & 14 deletions Sources/MachOKit/MachOImage+Strings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,61 +9,87 @@
import Foundation

extension MachOImage {
public struct Strings: Sequence {
public let basePointer: UnsafePointer<CChar>
public typealias Strings = UnicodeStrings<UTF8>
public typealias UTF16Strings = UnicodeStrings<UTF16>

public struct UnicodeStrings<Encoding: _UnicodeEncoding>: StringTable {
public let basePointer: UnsafePointer<Encoding.CodeUnit>
public let tableSize: Int

public let isLittleEndian: Bool

init(
basePointer: UnsafePointer<Encoding.CodeUnit>,
tableSize: Int,
isLittleEndian: Bool = false
) {
self.basePointer = basePointer
self.tableSize = tableSize
self.isLittleEndian = isLittleEndian
}

public func makeIterator() -> Iterator {
Iterator(
basePointer: basePointer,
tableSize: tableSize
tableSize: tableSize,
isLittleEndian: isLittleEndian
)
}
}
}

extension MachOImage.Strings {
extension MachOImage.UnicodeStrings {
init(
ptr: UnsafeRawPointer,
text: SegmentCommand64,
linkedit: SegmentCommand64,
symtab: LoadCommandInfo<symtab_command>
symtab: LoadCommandInfo<symtab_command>,
isLittleEndian: Bool = false
) {
let fileSlide = Int(linkedit.vmaddr) - Int(text.vmaddr) - Int(linkedit.fileoff)
self.basePointer = ptr
.advanced(by: numericCast(symtab.stroff))
.advanced(by: numericCast(fileSlide))
.assumingMemoryBound(to: CChar.self)
.assumingMemoryBound(to: Encoding.CodeUnit.self)
self.tableSize = Int(symtab.strsize)
self.isLittleEndian = isLittleEndian
}

init(
ptr: UnsafeRawPointer,
text: SegmentCommand,
linkedit: SegmentCommand,
symtab: LoadCommandInfo<symtab_command>
symtab: LoadCommandInfo<symtab_command>,
isLittleEndian: Bool = false
) {
let fileSlide = Int(linkedit.vmaddr) - Int(text.vmaddr) - Int(linkedit.fileoff)
self.basePointer = ptr
.advanced(by: numericCast(symtab.stroff))
.advanced(by: numericCast(fileSlide))
.assumingMemoryBound(to: CChar.self)
.assumingMemoryBound(to: Encoding.CodeUnit.self)
self.tableSize = Int(symtab.strsize)
self.isLittleEndian = isLittleEndian
}
}

extension MachOImage.Strings {
extension MachOImage.UnicodeStrings {
public struct Iterator: IteratorProtocol {
public typealias Element = StringTableEntry

private let basePointer: UnsafePointer<CChar>
private let basePointer: UnsafePointer<Encoding.CodeUnit>
private let tableSize: Int
private let isLittleEndian: Bool

private var nextPointer: UnsafePointer<CChar>
private var nextPointer: UnsafePointer<Encoding.CodeUnit>

init(basePointer: UnsafePointer<CChar>, tableSize: Int) {
init(
basePointer: UnsafePointer<Encoding.CodeUnit>,
tableSize: Int,
isLittleEndian: Bool
) {
self.basePointer = basePointer
self.tableSize = tableSize
self.isLittleEndian = isLittleEndian
self.nextPointer = basePointer
}

Expand All @@ -72,8 +98,25 @@ extension MachOImage.Strings {
if offset >= tableSize {
return nil
}
let (string, nextOffset) = nextPointer.readString()
nextPointer = nextPointer.advanced(by: nextOffset)
var (string, nextOffset) = nextPointer.readString(
as: Encoding.self
)

if isLittleEndian {
let data = Data(bytes: nextPointer, count: offset)
string = data.withUnsafeBytes {
let baseAddress = $0.baseAddress!
.assumingMemoryBound(to: Encoding.CodeUnit.self)
return .init(
decodingCString: baseAddress,
as: Encoding.self
)
}
}

nextPointer = nextPointer.advanced(
by: nextOffset / MemoryLayout<Encoding.CodeUnit>.size
)

return .init(string: string, offset: offset)
}
Expand Down
3 changes: 2 additions & 1 deletion Sources/MachOKit/Protocol/MachORepresentable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ public protocol MachORepresentable {
associatedtype RebaseOperations: Sequence<RebaseOperation>
associatedtype BindOperations: Sequence<BindOperation>
associatedtype ExportTrie: Sequence<ExportTrieEntry>
associatedtype Strings: Sequence<StringTableEntry>
associatedtype Strings: StringTable<UTF8>
associatedtype UTF16Strings: StringTable<UTF16>
associatedtype FunctionStarts: Sequence<FunctionStart>
associatedtype DataInCode: RandomAccessCollection<DataInCodeEntry>
associatedtype DyldChainedFixups: DyldChainedFixupsProtocol
Expand Down
11 changes: 11 additions & 0 deletions Sources/MachOKit/Protocol/StringTable.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//
// StringTable.swift
// MachOKit
//
// Created by p-x9 on 2025/02/02
//
//

public protocol StringTable<Encoding>: Sequence<StringTableEntry> {
associatedtype Encoding: _UnicodeEncoding
}

0 comments on commit a0aaa3c

Please sign in to comment.