Skip to content

Commit

Permalink
Merge pull request #179 from p-x9/feature/ustring
Browse files Browse the repository at this point in the history
Support table of UTF16 string
  • Loading branch information
p-x9 authored Feb 4, 2025
2 parents 6debf60 + a61dc6d commit 518e8e1
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 26 deletions.
31 changes: 31 additions & 0 deletions Sources/MachOKit/Extension/Data+.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//
// Data+.swift
// MachOKit
//
// Created by p-x9 on 2025/02/02
//
//
import Foundation

extension Data {
func byteSwapped<T: FixedWidthInteger>(_ type: T.Type) -> Data {
guard count >= MemoryLayout<T>.size else { return self }

let valueArray = self.withUnsafeBytes {
Array($0.bindMemory(to: T.self))
}

let swappedArray = valueArray.map { $0.byteSwapped }

var swappedData = swappedArray.withUnsafeBufferPointer {
Data(buffer: $0)
}

let remainingBytes = count % MemoryLayout<T>.size
if remainingBytes > 0 {
swappedData.append(self.suffix(remainingBytes))
}

return swappedData
}
}
20 changes: 20 additions & 0 deletions Sources/MachOKit/Extension/UnsafePointer+.swift
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,23 @@ extension UnsafePointer<CChar> {
return (string, offset)
}
}

extension UnsafePointer where Pointee: FixedWidthInteger {
func findNullTerminator() -> UnsafePointer<Pointee> {
var ptr = self
while ptr.pointee != 0 {
ptr = ptr.advanced(by: 1)
}
return ptr
}

func readString<Encoding: _UnicodeEncoding>(
as encoding: Encoding.Type
) -> (String, Int) where Pointee == Encoding.CodeUnit {
let nullTerminator = findNullTerminator()
let offset = Int(bitPattern: nullTerminator) + MemoryLayout<Pointee>.size - Int(bitPattern: self)
let string = String(decodingCString: self, as: Encoding.self)

return (string, offset)
}
}
2 changes: 1 addition & 1 deletion Sources/MachOKit/LoadCommand/LinkerOptionCommand.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ extension LinkerOptionCommand {
let ptr = cmdsStart
.advanced(by: offset)
.advanced(by: layoutSize)
.assumingMemoryBound(to: CChar.self)
.assumingMemoryBound(to: UInt8.self)
let strings = MachOImage.Strings(
basePointer: ptr,
tableSize: Int(layout.cmdsize) - layoutSize
Expand Down
2 changes: 1 addition & 1 deletion Sources/MachOKit/LoadCommand/Model/Section.swift
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ extension SectionProtocol {
}
let tableSize = size
return MachOImage.Strings(
basePointer: basePointer.assumingMemoryBound(to: CChar.self),
basePointer: basePointer.assumingMemoryBound(to: UInt8.self),
tableSize: tableSize
)
}
Expand Down
43 changes: 34 additions & 9 deletions Sources/MachOKit/MachOFile+Strings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
import Foundation

extension MachOFile {
public struct Strings: Sequence {
public typealias Strings = UnicodeStrings<UTF8>
public typealias UTF16Strings = UnicodeStrings<UTF16>

public struct UnicodeStrings<Encoding: _UnicodeEncoding>: StringTable {
public let data: Data

/// file offset of string table start
Expand All @@ -18,38 +21,48 @@ extension MachOFile {
/// size of string table
public let size: Int

public let isLittleEndian: Bool

public func makeIterator() -> Iterator {
.init(data: data)
.init(data: data, isLittleEndian: isLittleEndian)
}
}
}

extension MachOFile.Strings {
init(machO: MachOFile, offset: Int, size: Int) {
extension MachOFile.UnicodeStrings {
init(
machO: MachOFile,
offset: Int,
size: Int,
isLittleEndian: Bool = false
) {
let data = machO.fileHandle.readData(
offset: numericCast(offset),
size: size
)
self.init(
data: data,
offset: offset,
size: size
size: size,
isLittleEndian: isLittleEndian
)
}
}

extension MachOFile.Strings {
extension MachOFile.UnicodeStrings {
public struct Iterator: IteratorProtocol {
public typealias Element = StringTableEntry

private let data: Data
private let tableSize: Int
private let isLittleEndian: Bool

private var nextOffset: Int

init(data: Data) {
init(data: Data, isLittleEndian: Bool) {
self.data = data
self.tableSize = data.count
self.isLittleEndian = isLittleEndian
self.nextOffset = 0
}

Expand All @@ -60,8 +73,20 @@ extension MachOFile.Strings {

let ptr = baseAddress
.advanced(by: nextOffset)
.assumingMemoryBound(to: UInt8.self)
let (string, offset) = ptr.readString()
.assumingMemoryBound(to: Encoding.CodeUnit.self)
var (string, offset) = ptr.readString(as: Encoding.self)

if isLittleEndian {
let data = Data(bytes: ptr, count: offset)
string = data.withUnsafeBytes {
let baseAddress = $0.baseAddress!
.assumingMemoryBound(to: Encoding.CodeUnit.self)
return .init(
decodingCString: baseAddress,
as: Encoding.self
)
}
}

let result = Element(string: string, offset: nextOffset)

Expand Down
15 changes: 15 additions & 0 deletions Sources/MachOKit/MachOFile.swift
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,21 @@ extension MachOFile {
public var allCStrings: [String] {
allCStringTables.flatMap { $0.map(\.string) }
}

public var uStrings: UTF16Strings? {
guard let section = sections.first(where: {
$0.sectionName == "__ustring"
}) else { return nil }

let offset = headerStartOffset + section.offset

return .init(
machO: self,
offset: offset,
size: section.size,
isLittleEndian: true
)
}
}

extension MachOFile {
Expand Down
71 changes: 57 additions & 14 deletions Sources/MachOKit/MachOImage+Strings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,61 +9,87 @@
import Foundation

extension MachOImage {
public struct Strings: Sequence {
public let basePointer: UnsafePointer<CChar>
public typealias Strings = UnicodeStrings<UTF8>
public typealias UTF16Strings = UnicodeStrings<UTF16>

public struct UnicodeStrings<Encoding: _UnicodeEncoding>: StringTable {
public let basePointer: UnsafePointer<Encoding.CodeUnit>
public let tableSize: Int

public let isLittleEndian: Bool

init(
basePointer: UnsafePointer<Encoding.CodeUnit>,
tableSize: Int,
isLittleEndian: Bool = false
) {
self.basePointer = basePointer
self.tableSize = tableSize
self.isLittleEndian = isLittleEndian
}

public func makeIterator() -> Iterator {
Iterator(
basePointer: basePointer,
tableSize: tableSize
tableSize: tableSize,
isLittleEndian: isLittleEndian
)
}
}
}

extension MachOImage.Strings {
extension MachOImage.UnicodeStrings {
init(
ptr: UnsafeRawPointer,
text: SegmentCommand64,
linkedit: SegmentCommand64,
symtab: LoadCommandInfo<symtab_command>
symtab: LoadCommandInfo<symtab_command>,
isLittleEndian: Bool = false
) {
let fileSlide = Int(linkedit.vmaddr) - Int(text.vmaddr) - Int(linkedit.fileoff)
self.basePointer = ptr
.advanced(by: numericCast(symtab.stroff))
.advanced(by: numericCast(fileSlide))
.assumingMemoryBound(to: CChar.self)
.assumingMemoryBound(to: Encoding.CodeUnit.self)
self.tableSize = Int(symtab.strsize)
self.isLittleEndian = isLittleEndian
}

init(
ptr: UnsafeRawPointer,
text: SegmentCommand,
linkedit: SegmentCommand,
symtab: LoadCommandInfo<symtab_command>
symtab: LoadCommandInfo<symtab_command>,
isLittleEndian: Bool = false
) {
let fileSlide = Int(linkedit.vmaddr) - Int(text.vmaddr) - Int(linkedit.fileoff)
self.basePointer = ptr
.advanced(by: numericCast(symtab.stroff))
.advanced(by: numericCast(fileSlide))
.assumingMemoryBound(to: CChar.self)
.assumingMemoryBound(to: Encoding.CodeUnit.self)
self.tableSize = Int(symtab.strsize)
self.isLittleEndian = isLittleEndian
}
}

extension MachOImage.Strings {
extension MachOImage.UnicodeStrings {
public struct Iterator: IteratorProtocol {
public typealias Element = StringTableEntry

private let basePointer: UnsafePointer<CChar>
private let basePointer: UnsafePointer<Encoding.CodeUnit>
private let tableSize: Int
private let isLittleEndian: Bool

private var nextPointer: UnsafePointer<CChar>
private var nextPointer: UnsafePointer<Encoding.CodeUnit>

init(basePointer: UnsafePointer<CChar>, tableSize: Int) {
init(
basePointer: UnsafePointer<Encoding.CodeUnit>,
tableSize: Int,
isLittleEndian: Bool
) {
self.basePointer = basePointer
self.tableSize = tableSize
self.isLittleEndian = isLittleEndian
self.nextPointer = basePointer
}

Expand All @@ -72,8 +98,25 @@ extension MachOImage.Strings {
if offset >= tableSize {
return nil
}
let (string, nextOffset) = nextPointer.readString()
nextPointer = nextPointer.advanced(by: nextOffset)
var (string, nextOffset) = nextPointer.readString(
as: Encoding.self
)

if isLittleEndian {
let data = Data(bytes: nextPointer, count: offset)
string = data.withUnsafeBytes {
let baseAddress = $0.baseAddress!
.assumingMemoryBound(to: Encoding.CodeUnit.self)
return .init(
decodingCString: baseAddress,
as: Encoding.self
)
}
}

nextPointer = nextPointer.advanced(
by: nextOffset / MemoryLayout<Encoding.CodeUnit>.size
)

return .init(string: string, offset: offset)
}
Expand Down
18 changes: 18 additions & 0 deletions Sources/MachOKit/MachOImage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,24 @@ extension MachOImage {
public var allCStrings: [String] {
allCStringTables.flatMap { $0.map(\.string) }
}

public var uStrings: UTF16Strings? {
guard let vmaddrSlide else { return nil }
guard let section = sections64.first(where: {
$0.sectionName == "__ustring"
}) else { return nil }

guard let start = section.startPtr(vmaddrSlide: vmaddrSlide) else {
return nil
}

return .init(
basePointer: start
.assumingMemoryBound(to: UInt16.self),
tableSize: numericCast(section.size),
isLittleEndian: true
)
}
}

extension MachOImage {
Expand Down
6 changes: 5 additions & 1 deletion Sources/MachOKit/Protocol/MachORepresentable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ public protocol MachORepresentable {
associatedtype RebaseOperations: Sequence<RebaseOperation>
associatedtype BindOperations: Sequence<BindOperation>
associatedtype ExportTrie: Sequence<ExportTrieEntry>
associatedtype Strings: Sequence<StringTableEntry>
associatedtype Strings: StringTable<UTF8>
associatedtype UTF16Strings: StringTable<UTF16>
associatedtype FunctionStarts: Sequence<FunctionStart>
associatedtype DataInCode: RandomAccessCollection<DataInCodeEntry>
associatedtype DyldChainedFixups: DyldChainedFixupsProtocol
Expand Down Expand Up @@ -84,6 +85,9 @@ public protocol MachORepresentable {
/// Symbol strings is not included.
var allCStrings: [String] { get }

/// Sequence of utf16 strings in `__TEXT, __ustring` section
var uStrings: UTF16Strings? { get }

/// List of CFStrings in all segments
var cfStrings: [any CFStringProtocol]? { get }
/// List of CFStrings in 64-bit architecture segments
Expand Down
11 changes: 11 additions & 0 deletions Sources/MachOKit/Protocol/StringTable.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//
// StringTable.swift
// MachOKit
//
// Created by p-x9 on 2025/02/02
//
//

public protocol StringTable<Encoding>: Sequence<StringTableEntry> {
associatedtype Encoding: _UnicodeEncoding
}
8 changes: 8 additions & 0 deletions Tests/MachOKitTests/MachOFilePrintTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,14 @@ final class MachOFilePrintTests: XCTestCase {
}
}

func testUStrings() throws {
guard let cstrings = machO.uStrings else { return }
for (i, cstring) in cstrings.enumerated() {
let offset = cstrings.offset + cstring.offset - machO.headerStartOffset
print(i, "0x" + String(offset, radix: 16), cstring.string)
}
}

func testCFStrings() {
guard let cfStrings = machO.cfStrings else { return }
for (i, cfString) in cfStrings.enumerated() {
Expand Down
Loading

0 comments on commit 518e8e1

Please sign in to comment.