Skip to content

Commit 30bc560

Browse files
committed
Add type representation to traverser
1 parent d91679f commit 30bc560

File tree

6 files changed

+120
-49
lines changed

6 files changed

+120
-49
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.8.4
2+
current_version = 0.8.5
33
commit = True
44
tag = False
55

objutils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
The first parameter is always the codec name.
1414
"""
1515

16-
__version__ = "0.8.4"
16+
__version__ = "0.8.5"
1717

1818
__all__ = [
1919
"Image",

objutils/dwarf/constants.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,12 @@ class DiscriminantDescriptor(EnumBase):
856856
range = 0x01
857857

858858

859+
class Defaulted(EnumBase):
860+
no = 0x00
861+
in_class = 0x01
862+
out_of_class = 0x02
863+
864+
859865
class LineNumberStandard(EnumBase):
860866
DW_LNS_copy = 0x01
861867
DW_LNS_advance_pc = 0x02
@@ -999,8 +1005,3 @@ class Languages(EnumBase):
9991005
GOOGLE_RenderScript = 0x8E57
10001006
BORLAND_Delphi = 0xB000
10011007

1002-
1003-
class Defaulted(EnumBase):
1004-
DW_DEFAULTED_no = 0x00
1005-
DW_DEFAULTED_in_class = 0x01
1006-
DW_DEFAULTED_out_of_class = 0x02

objutils/dwarf/traverser.py

Lines changed: 110 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
11
from collections import defaultdict
2-
from dataclasses import dataclass
2+
from dataclasses import dataclass, field
33
from functools import lru_cache
44
from itertools import groupby
55
from typing import Any, Optional, Union
66

77
from objutils.dwarf.constants import (
8+
Accessibility,
89
AttributeEncoding,
910
AttributeForm,
1011
BaseTypeEncoding,
12+
CallingConvention,
13+
Defaulted,
14+
DecimalSign,
15+
DiscriminantDescriptor,
16+
Endianity,
17+
IdentifierCase,
18+
Inline,
19+
Languages,
20+
Ordering,
1121
Tag,
22+
Virtuality,
23+
Visibility,
1224
)
1325
from objutils.dwarf.encoding import Endianess
1426
from objutils.dwarf.readers import DwarfReaders
1527
from objutils.elf import defs, model
1628
from objutils.elf.model import DIEAttribute
1729

18-
1930
DWARF_TYPE_ENCODINGS = frozenset(
2031
{
2132
Tag.base_type,
@@ -53,6 +64,31 @@ class CompiledUnit:
5364
language: str
5465

5566

67+
@dataclass
68+
class DIE:
69+
tag: str
70+
children: list[Any] = field(default_factory=list)
71+
attributes: dict[str, Any] = field(default_factory=dict)
72+
73+
74+
DATA_REPRESENTATION = {
75+
"encoding": BaseTypeEncoding,
76+
"decimal_sign": DecimalSign,
77+
"endianity": Endianity,
78+
"accessibility": Accessibility,
79+
"visibility": Visibility,
80+
"virtuality": Virtuality,
81+
"language": Languages,
82+
"identifier_case": IdentifierCase,
83+
"calling_convention": CallingConvention,
84+
"inline": Inline,
85+
"ordering": Ordering,
86+
"discr_list": DiscriminantDescriptor,
87+
"defaulted": Defaulted,
88+
89+
}
90+
91+
5692
def get_attribute(attrs: dict[str, DIEAttribute], key: str, default: Union[int, str]) -> Union[int, str]:
5793
attr: Optional[DIEAttribute] = attrs.get(key)
5894
if attr is None:
@@ -64,7 +100,8 @@ def get_attribute(attrs: dict[str, DIEAttribute], key: str, default: Union[int,
64100
class CompiledUnitsSummary:
65101

66102
def __init__(self, session) -> None:
67-
cus = session.query(model.DebugInformationEntry).filter(model.DebugInformationEntry.tag == Tag.compile_unit).all()
103+
cus = session.query(model.DebugInformationEntry).filter(
104+
model.DebugInformationEntry.tag == Tag.compile_unit).all()
68105
units = []
69106
tps = set()
70107
for cu in cus:
@@ -80,7 +117,8 @@ def __init__(self, session) -> None:
80117
print(f"\t\tVariable without type: {ch.attributes_map}")
81118
else:
82119
tpx = int(ch.attributes_map["type"].raw_value)
83-
tp = session.query(model.DebugInformationEntry).filter(model.DebugInformationEntry.offset == tpx).first()
120+
tp = session.query(model.DebugInformationEntry).filter(
121+
model.DebugInformationEntry.offset == tpx).first()
84122
print(tp.attributes_map)
85123

86124
groups = groupby(sorted(units, key=lambda x: x.comp_dir), key=lambda x: x.comp_dir)
@@ -132,7 +170,8 @@ class AttributeParser:
132170
# Little = 0
133171
# Big = 1
134172

135-
def __init__(self, session_or_path, *, import_if_needed: bool = True, force_import: bool = False, quiet: bool = True):
173+
def __init__(self, session_or_path, *, import_if_needed: bool = True, force_import: bool = False,
174+
quiet: bool = True):
136175
"""
137176
Create an AttributeParser.
138177
@@ -194,7 +233,8 @@ def __init__(self, session_or_path, *, import_if_needed: bool = True, force_impo
194233
self.dwarf_expression = factory.dwarf_expression
195234

196235
@lru_cache(maxsize=64 * 1024)
197-
def type_tree(self, obj: Union[int, model.DebugInformationEntry, DIEAttribute]) -> dict[str, Any] | CircularReference:
236+
def type_tree(self, obj: Union[int, model.DebugInformationEntry, DIEAttribute]) -> dict[
237+
str, Any] | CircularReference:
198238
"""Return a fully traversed type tree as a dict.
199239
200240
Accepts one of:
@@ -238,9 +278,9 @@ def type_tree(self, obj: Union[int, model.DebugInformationEntry, DIEAttribute])
238278
return {"tag": "<unsupported>", "attrs": {}}
239279

240280
def _resolve_type_offset(
241-
self,
242-
type_attr: DIEAttribute,
243-
context_die: Optional[model.DebugInformationEntry],
281+
self,
282+
type_attr: DIEAttribute,
283+
context_die: Optional[model.DebugInformationEntry],
244284
) -> Optional[int]:
245285
"""Resolve a DW_AT_type attribute's value to an absolute DIE offset.
246286
@@ -257,11 +297,11 @@ def _resolve_type_offset(
257297
try:
258298
frm = getattr(type_attr, "form", None)
259299
if frm in (
260-
getattr(AttributeForm, "DW_FORM_ref1", None),
261-
getattr(AttributeForm, "DW_FORM_ref2", None),
262-
getattr(AttributeForm, "DW_FORM_ref4", None),
263-
getattr(AttributeForm, "DW_FORM_ref8", None),
264-
getattr(AttributeForm, "DW_FORM_ref_udata", None),
300+
getattr(AttributeForm, "DW_FORM_ref1", None),
301+
getattr(AttributeForm, "DW_FORM_ref2", None),
302+
getattr(AttributeForm, "DW_FORM_ref4", None),
303+
getattr(AttributeForm, "DW_FORM_ref8", None),
304+
getattr(AttributeForm, "DW_FORM_ref_udata", None),
265305
):
266306
base = getattr(context_die, "cu_start", 0) if context_die is not None else 0
267307
off += int(base or 0)
@@ -290,29 +330,42 @@ def traverse_tree(self, entry: model.DebugInformationEntry, level: int = 0) -> N
290330
try:
291331
frm = getattr(attr, "form", None)
292332
if frm in (
293-
getattr(AttributeForm, "DW_FORM_ref1", None),
294-
getattr(AttributeForm, "DW_FORM_ref2", None),
295-
getattr(AttributeForm, "DW_FORM_ref4", None),
296-
getattr(AttributeForm, "DW_FORM_ref8", None),
297-
getattr(AttributeForm, "DW_FORM_ref_udata", None),
333+
getattr(AttributeForm, "DW_FORM_ref1", None),
334+
getattr(AttributeForm, "DW_FORM_ref2", None),
335+
getattr(AttributeForm, "DW_FORM_ref4", None),
336+
getattr(AttributeForm, "DW_FORM_ref8", None),
337+
getattr(AttributeForm, "DW_FORM_ref_udata", None),
298338
):
299339
base = getattr(entry, "cu_start", 0) or 0
300340
off += int(base)
301341
except Exception:
302342
pass
303343
type_info = f" -> {self._type_summary(int(off))}"
304344
if "location" in entry.attributes_map:
305-
location = self.dwarf_expression(entry.attributes_map["location"].form, entry.attributes_map["location"].raw_value)
345+
location = self.dwarf_expression(entry.attributes_map["location"].form,
346+
entry.attributes_map["location"].raw_value)
306347
print(f"{' ' * level}{tag} '{name}'{type_info} [location={location}] [off=0x{entry.offset:08x}]")
307348
else:
308349
if tag == "enumerator" and "const_value" in entry.attributes_map:
309350
enumerator_value = int(entry.attributes_map["const_value"].raw_value)
310-
print(f"{' ' * level}{tag} '{name}'{type_info} [value=0x{enumerator_value:04x}] [off=0x{entry.offset:08x}]")
351+
print(
352+
f"{' ' * level}{tag} '{name}'{type_info} [value=0x{enumerator_value:04x}] [off=0x{entry.offset:08x}]")
353+
elif tag == 'subrange_type':
354+
lower_bound = 0
355+
upper_bound = 0
356+
if "lower_bound" in entry.attributes_map:
357+
lower_bound = int(entry.attributes_map["lower_bound"].raw_value)
358+
if "upper_bound" in entry.attributes_map:
359+
upper_bound = int(entry.attributes_map["upper_bound"].raw_value)
360+
print(
361+
f"{' ' * level}{tag} '{name}'{type_info} [lower_bound={lower_bound}: upper_bound={upper_bound}] [off=0x{entry.offset:08x}]")
311362
elif tag == "member" and "data_member_location" in entry.attributes_map:
312363
data_member_location = self.dwarf_expression(
313-
entry.attributes_map["data_member_location"].form, entry.attributes_map["data_member_location"].raw_value
364+
entry.attributes_map["data_member_location"].form,
365+
entry.attributes_map["data_member_location"].raw_value
314366
)
315-
print(f"{' ' * level}{tag} '{name}'{type_info} [location={data_member_location}] [off=0x{entry.offset:08x}]")
367+
print(
368+
f"{' ' * level}{tag} '{name}'{type_info} [location={data_member_location}] [off=0x{entry.offset:08x}]")
316369
elif tag == "base_type":
317370
descr = ""
318371
if "byte_size" in entry.attributes_map:
@@ -330,6 +383,7 @@ def traverse_tree(self, entry: model.DebugInformationEntry, level: int = 0) -> N
330383
for child in getattr(entry, "children", []) or []:
331384
self.traverse_tree(child, level + 1)
332385

386+
@lru_cache(maxsize=8192)
333387
def parse_attributes(self, die: model.DebugInformationEntry, level: int) -> dict[str, Any]:
334388
result: dict[str, Any] = defaultdict(dict)
335389
# Prefer attributes_map to avoid repeated scans
@@ -357,23 +411,36 @@ def parse_attributes(self, die: model.DebugInformationEntry, level: int) -> dict
357411
try:
358412
frm = getattr(attr, "form", None)
359413
if frm in (
360-
getattr(AttributeForm, "DW_FORM_ref1", None),
361-
getattr(AttributeForm, "DW_FORM_ref2", None),
362-
getattr(AttributeForm, "DW_FORM_ref4", None),
363-
getattr(AttributeForm, "DW_FORM_ref8", None),
364-
getattr(AttributeForm, "DW_FORM_ref_udata", None),
414+
getattr(AttributeForm, "DW_FORM_ref1", None),
415+
getattr(AttributeForm, "DW_FORM_ref2", None),
416+
getattr(AttributeForm, "DW_FORM_ref4", None),
417+
getattr(AttributeForm, "DW_FORM_ref8", None),
418+
getattr(AttributeForm, "DW_FORM_ref_udata", None),
365419
):
366420
base = getattr(die, "cu_start", 0) or 0
367421
referenced_offset += int(base)
368422
except Exception:
369423
pass
370424
if referenced_offset and referenced_offset != die.offset:
371-
result.setdefault("attrs", {})[attr_name] = self.parse_type(referenced_offset, level + 1)
425+
# result.setdefault("attrs", {})[attr_name] = self.parse_type(referenced_offset, level + 1)
426+
result[attr_name] = self.parse_type(referenced_offset, level + 1)
372427
continue
373-
374428
# Default: keep raw_value to stay close to DB content
375-
result.setdefault("attrs", {})[attr_name] = attr.raw_value
376-
429+
# result.setdefault("attrs", {})[attr_name] = attr.raw_value
430+
elif attr_name in DATA_REPRESENTATION:
431+
converter = DATA_REPRESENTATION[attr_name]
432+
try:
433+
attr_value = int(attr.raw_value)
434+
except Exception:
435+
result[attr_name] = attr.raw_value
436+
continue
437+
try:
438+
converted_value = converter(attr_value)
439+
except Exception:
440+
converted_value = attr_value
441+
result[attr_name] = converted_value
442+
else:
443+
result[attr_name] = attr.raw_value
377444
return result
378445

379446
def parse_type(self, offset: int, level: int = 0) -> dict[str, Any] | CircularReference:
@@ -408,19 +475,22 @@ def parse_type(self, offset: int, level: int = 0) -> dict[str, Any] | CircularRe
408475

409476
self.type_stack.add(offset)
410477
try:
411-
result: dict[str, Any] = defaultdict(dict)
412-
result["tag"] = getattr(die.abbrev, "tag", die.tag)
413-
result["children"] = []
478+
# result: dict[str, Any] = defaultdict(dict)
479+
# result["tag"] = getattr(die.abbrev, "tag", die.tag)
480+
# result["children"] = []
481+
482+
result: DIE = DIE(getattr(die.abbrev, "tag", die.tag))
414483

415484
# Parse this DIE's attributes
416-
result.update(self.parse_attributes(die, level))
485+
result.attributes.update(self.parse_attributes(die, level))
417486

418487
# Parse interesting children (e.g., members of a struct, enumerators, subrange bounds)
419488
for child in getattr(die, "children", []) or []:
420-
sub: dict[str, Any] = defaultdict(dict)
421-
sub["tag"] = getattr(child.abbrev, "tag", child.tag)
422-
sub.update(self.parse_attributes(child, level + 1))
423-
result["children"].append(sub)
489+
# sub: dict[str, Any] = defaultdict(dict)
490+
# sub["tag"] = getattr(child.abbrev, "tag", child.tag)
491+
sub: DIE = DIE(getattr(child.abbrev, "tag", child.tag))
492+
sub.attributes.update(self.parse_attributes(child, level + 1))
493+
result.children.append(sub)
424494

425495
# cache result
426496
self.parsed_types[offset] = result

objutils/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""objutils version module"""
22

3-
__version__ = "0.8.4"
3+
__version__ = "0.8.5"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "objutils"
3-
version = "0.8.4"
3+
version = "0.8.5"
44
description = "Objectfile library for Python"
55
authors = ["Christoph Schueler <[email protected]>"]
66
license = "GPLv2"

0 commit comments

Comments
 (0)