Skip to content

Commit 0d7033f

Browse files
committed
IDEV-2204: Add DocstringPatcher to patch the specs from DT official techdocs.
1 parent 2c1586e commit 0d7033f

File tree

5 files changed

+3006
-35
lines changed

5 files changed

+3006
-35
lines changed

domaintools/api.py

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55

66
import re
77
import ssl
8+
import yaml
89

910
from domaintools.constants import (
1011
Endpoint,
1112
OutputFormat,
1213
ENDPOINT_TO_SOURCE_MAP,
1314
RTTF_PRODUCTS_LIST,
1415
RTTF_PRODUCTS_CMD_MAPPING,
16+
SPECS_MAPPING,
1517
)
1618
from domaintools._version import current as version
1719
from domaintools.results import (
@@ -29,7 +31,11 @@
2931
filter_by_field,
3032
DTResultFilter,
3133
)
32-
from domaintools.utils import validate_feeds_parameters
34+
from domaintools.utils import (
35+
api_endpoint,
36+
auto_patch_docstrings,
37+
validate_feeds_parameters,
38+
)
3339

3440

3541
AVAILABLE_KEY_SIGN_HASHES = ["sha1", "sha256"]
@@ -40,6 +46,7 @@ def delimited(items, character="|"):
4046
return character.join(items) if type(items) in (list, tuple, set) else items
4147

4248

49+
@auto_patch_docstrings
4350
class API(object):
4451
"""Enables interacting with the DomainTools API via Python:
4552
@@ -94,8 +101,10 @@ def __init__(
94101
self.key_sign_hash = key_sign_hash
95102
self.default_parameters["app_name"] = app_name
96103
self.default_parameters["app_version"] = app_version
104+
self.specs = {}
97105

98106
self._build_api_url(api_url, api_port)
107+
self._initialize_specs()
99108

100109
if not https:
101110
raise Exception(
@@ -104,8 +113,25 @@ def __init__(
104113
if proxy_url and not isinstance(proxy_url, str):
105114
raise Exception("Proxy URL must be a string. For example: '127.0.0.1:8888'")
106115

116+
def _initialize_specs(self):
117+
for spec_name, file_path in SPECS_MAPPING.items():
118+
try:
119+
with open(file_path, "r", encoding="utf-8") as f:
120+
spec_content = yaml.safe_load(f)
121+
if not spec_content:
122+
raise ValueError("Spec file is empty or invalid.")
123+
124+
self.specs[spec_name] = spec_content
125+
126+
except Exception as e:
127+
print(f"Error loading {file_path}: {e}")
128+
107129
def _get_ssl_default_context(self, verify_ssl: Union[str, bool]):
108-
return ssl.create_default_context(cafile=verify_ssl) if isinstance(verify_ssl, str) else verify_ssl
130+
return (
131+
ssl.create_default_context(cafile=verify_ssl)
132+
if isinstance(verify_ssl, str)
133+
else verify_ssl
134+
)
109135

110136
def _build_api_url(self, api_url=None, api_port=None):
111137
"""Build the API url based on the given url and port. Defaults to `https://api.domaintools.com`"""
@@ -133,11 +159,18 @@ def _rate_limit(self, product):
133159
hours = limit_hours and 3600 / float(limit_hours)
134160
minutes = limit_minutes and 60 / float(limit_minutes)
135161

136-
self.limits[product["id"]] = {"interval": timedelta(seconds=minutes or hours or default)}
162+
self.limits[product["id"]] = {
163+
"interval": timedelta(seconds=minutes or hours or default)
164+
}
137165

138166
def _results(self, product, path, cls=Results, **kwargs):
139167
"""Returns _results for the specified API path with the specified **kwargs parameters"""
140-
if product != "account-information" and self.rate_limit and not self.limits_set and not self.limits:
168+
if (
169+
product != "account-information"
170+
and self.rate_limit
171+
and not self.limits_set
172+
and not self.limits
173+
):
141174
always_sign_api_key_previous_value = self.always_sign_api_key
142175
header_authentication_previous_value = self.header_authentication
143176
self._rate_limit(product)
@@ -181,7 +214,9 @@ def handle_api_key(self, is_rttf_product, path, parameters):
181214
else:
182215
raise ValueError(
183216
"Invalid value '{0}' for 'key_sign_hash'. "
184-
"Values available are {1}".format(self.key_sign_hash, ",".join(AVAILABLE_KEY_SIGN_HASHES))
217+
"Values available are {1}".format(
218+
self.key_sign_hash, ",".join(AVAILABLE_KEY_SIGN_HASHES)
219+
)
185220
)
186221

187222
parameters["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -193,7 +228,9 @@ def handle_api_key(self, is_rttf_product, path, parameters):
193228

194229
def account_information(self, **kwargs):
195230
"""Provides a snapshot of your accounts current API usage"""
196-
return self._results("account-information", "/v1/account", items_path=("products",), **kwargs)
231+
return self._results(
232+
"account-information", "/v1/account", items_path=("products",), **kwargs
233+
)
197234

198235
def available_api_calls(self):
199236
"""Provides a list of api calls that you can use based on your account information."""
@@ -396,7 +433,9 @@ def reputation(self, query, include_reasons=False, **kwargs):
396433

397434
def reverse_ip(self, domain=None, limit=None, **kwargs):
398435
"""Pass in a domain name."""
399-
return self._results("reverse-ip", "/v1/{0}/reverse-ip".format(domain), limit=limit, **kwargs)
436+
return self._results(
437+
"reverse-ip", "/v1/{0}/reverse-ip".format(domain), limit=limit, **kwargs
438+
)
400439

401440
def host_domains(self, ip=None, limit=None, **kwargs):
402441
"""Pass in an IP address."""
@@ -570,8 +609,12 @@ def iris_enrich(self, *domains, **kwargs):
570609
younger_than_date = kwargs.pop("younger_than_date", {}) or None
571610
older_than_date = kwargs.pop("older_than_date", {}) or None
572611
updated_after = kwargs.pop("updated_after", {}) or None
573-
include_domains_with_missing_field = kwargs.pop("include_domains_with_missing_field", {}) or None
574-
exclude_domains_with_missing_field = kwargs.pop("exclude_domains_with_missing_field", {}) or None
612+
include_domains_with_missing_field = (
613+
kwargs.pop("include_domains_with_missing_field", {}) or None
614+
)
615+
exclude_domains_with_missing_field = (
616+
kwargs.pop("exclude_domains_with_missing_field", {}) or None
617+
)
575618

576619
filtered_results = DTResultFilter(result_set=results).by(
577620
[
@@ -624,6 +667,7 @@ def iris_enrich_cli(self, domains=None, **kwargs):
624667
**kwargs,
625668
)
626669

670+
@api_endpoint(spec_name="iris", path="/v1/iris-investigate/")
627671
def iris_investigate(
628672
self,
629673
domains=None,
@@ -641,29 +685,6 @@ def iris_investigate(
641685
**kwargs,
642686
):
643687
"""Returns back a list of domains based on the provided filters.
644-
The following filters are available beyond what is parameterized as kwargs:
645-
646-
- ip: Search for domains having this IP.
647-
- email: Search for domains with this email in their data.
648-
- email_domain: Search for domains where the email address uses this domain.
649-
- nameserver_host: Search for domains with this nameserver.
650-
- nameserver_domain: Search for domains with a nameserver that has this domain.
651-
- nameserver_ip: Search for domains with a nameserver on this IP.
652-
- registrar: Search for domains with this registrar.
653-
- registrant: Search for domains with this registrant name.
654-
- registrant_org: Search for domains with this registrant organization.
655-
- mailserver_host: Search for domains with this mailserver.
656-
- mailserver_domain: Search for domains with a mailserver that has this domain.
657-
- mailserver_ip: Search for domains with a mailserver on this IP.
658-
- redirect_domain: Search for domains which redirect to this domain.
659-
- ssl_hash: Search for domains which have an SSL certificate with this hash.
660-
- ssl_subject: Search for domains which have an SSL certificate with this subject string.
661-
- ssl_email: Search for domains which have an SSL certificate with this email in it.
662-
- ssl_org: Search for domains which have an SSL certificate with this organization in it.
663-
- google_analytics: Search for domains which have this Google Analytics code.
664-
- adsense: Search for domains which have this AdSense code.
665-
- tld: Filter by TLD. Must be combined with another parameter.
666-
- search_hash: Use search hash from Iris to bring back domains.
667688
668689
You can loop over results of your investigation as if it was a native Python list:
669690

domaintools/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,8 @@ class OutputFormat(Enum):
5656
"real-time-domain-discovery-feed-(api)": "domaindiscovery",
5757
"real-time-domain-discovery-feed-(s3)": "domaindiscovery",
5858
}
59+
60+
SPECS_MAPPING = {
61+
"iris": "domaintools/specs/iris-openapi.yaml",
62+
# "rttf": "domaintools/specs/feeds-openapi.yaml",
63+
}

domaintools/docstring_patcher.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import inspect
2+
import functools
3+
import textwrap
4+
5+
6+
class DocstringPatcher:
7+
"""
8+
Patches docstrings for methods decorated with @api_endpoint.
9+
"""
10+
11+
def patch(self, api_instance):
12+
method_names = []
13+
for attr_name in dir(api_instance):
14+
attr = getattr(api_instance, attr_name)
15+
# Look for the new decorator's tags
16+
if (
17+
inspect.ismethod(attr)
18+
and hasattr(attr, "_api_spec_name")
19+
and hasattr(attr, "_api_path")
20+
):
21+
method_names.append(attr_name)
22+
23+
for attr_name in method_names:
24+
original_method = getattr(api_instance, attr_name)
25+
original_function = original_method.__func__
26+
27+
spec_name = original_function._api_spec_name
28+
path = original_function._api_path
29+
30+
spec_to_use = api_instance.specs.get(spec_name)
31+
original_doc = inspect.getdoc(original_function) or ""
32+
33+
all_doc_sections = []
34+
if spec_to_use:
35+
path_item = spec_to_use.get("paths", {}).get(path, {})
36+
37+
# Loop over all HTTP methods defined for this path
38+
for http_method in ["get", "post", "put", "delete", "patch"]:
39+
if http_method in path_item:
40+
# Generate a doc section for this specific operation
41+
api_doc = self._generate_api_doc_string(spec_to_use, path, http_method)
42+
all_doc_sections.append(api_doc)
43+
44+
if not all_doc_sections:
45+
all_doc_sections.append(
46+
f"\n--- API Details Error ---"
47+
f"\n (Could not find any operations for path '{path}')"
48+
)
49+
50+
# Combine the original doc with all operation docs
51+
new_doc = textwrap.dedent(original_doc) + "\n\n" + "\n\n".join(all_doc_sections)
52+
53+
@functools.wraps(original_function)
54+
def method_wrapper(*args, _orig_meth=original_method, **kwargs):
55+
return _orig_meth(*args, **kwargs)
56+
57+
method_wrapper.__doc__ = new_doc
58+
setattr(
59+
api_instance,
60+
attr_name,
61+
method_wrapper.__get__(api_instance, api_instance.__class__),
62+
)
63+
64+
def _generate_api_doc_string(self, spec: dict, path: str, method: str) -> str:
65+
"""Creates the formatted API docstring section for ONE operation."""
66+
67+
details = self._get_operation_details(spec, path, method)
68+
# Add a clear title for this specific method
69+
lines = [f"--- Operation: {method.upper()} {path} ---"]
70+
71+
# Render Query Params
72+
lines.append(f"\n Summary: {details.get('summary')}")
73+
lines.append(f" Description: {details.get('description')}")
74+
lines.append(f" External Doc: {details.get('external_doc')}")
75+
lines.append("\n Query Parameters:")
76+
if not details["query_params"]:
77+
lines.append(" (No query parameters)")
78+
else:
79+
for param in details["query_params"]:
80+
lines.append(f"\n **{param['name']}** ({param['type']})")
81+
lines.append(f" Required: {param['required']}")
82+
lines.append(f" Description: {param['description']}")
83+
84+
# Render Request Body
85+
lines.append("\n Request Body:")
86+
if not details["request_body"]:
87+
lines.append(" (No request body)")
88+
else:
89+
body = details["request_body"]
90+
lines.append(f"\n **{body['type']}**")
91+
lines.append(f" Required: {body['required']}")
92+
lines.append(f" Description: {body['description']}")
93+
94+
return "\n".join(lines)
95+
96+
def _get_operation_details(self, spec: dict, path: str, method: str) -> dict:
97+
details = {"query_params": [], "request_body": None}
98+
if not spec:
99+
return details
100+
try:
101+
path_item = spec.get("paths", {}).get(path, {})
102+
operation = path_item.get(method.lower(), {})
103+
if not operation:
104+
return details
105+
all_param_defs = path_item.get("parameters", []) + operation.get("parameters", [])
106+
details["summary"] = operation.get("summary")
107+
details["description"] = operation.get("description")
108+
details["external_doc"] = operation.get("externalDocs", {}).get("url", "N/A")
109+
resolved_params = []
110+
for param_def in all_param_defs:
111+
if "$ref" in param_def:
112+
resolved_params.append(self._resolve_ref(spec, param_def["$ref"]))
113+
else:
114+
resolved_params.append(param_def)
115+
for p in [p for p in resolved_params if p.get("in") == "query"]:
116+
details["query_params"].append(
117+
{
118+
"name": p.get("name"),
119+
"required": p.get("required", False),
120+
"description": p.get("description", "N/A"),
121+
"type": self._get_param_type(spec, p.get("schema")),
122+
}
123+
)
124+
body_def = operation.get("requestBody")
125+
if body_def:
126+
if "$ref" in body_def:
127+
body_def = self._resolve_ref(spec, body_def["$ref"])
128+
content = body_def.get("content", {})
129+
media_type = next(iter(content.values()), None)
130+
if media_type and "schema" in media_type:
131+
schema = media_type["schema"]
132+
schema_type = self._get_param_type(spec, schema)
133+
if "$ref" in schema:
134+
schema_type = schema["$ref"].split("/")[-1]
135+
details["request_body"] = {
136+
"required": body_def.get("required", False),
137+
"description": body_def.get("description", "N/A"),
138+
"type": schema_type,
139+
}
140+
return details
141+
except Exception:
142+
return details
143+
144+
def _resolve_ref(self, spec: dict, ref: str):
145+
if not spec or not ref.startswith("#/"):
146+
return {}
147+
parts = ref.split("/")[1:]
148+
current_obj = spec
149+
for part in parts:
150+
if not isinstance(current_obj, dict):
151+
return {}
152+
current_obj = current_obj.get(part)
153+
if current_obj is None:
154+
return {}
155+
return current_obj
156+
157+
def _get_param_type(self, spec: dict, schema: dict) -> str:
158+
if not schema:
159+
return "N/A"
160+
schema_ref = schema.get("$ref")
161+
if schema_ref:
162+
resolved_schema = self._resolve_ref(spec, schema_ref)
163+
return resolved_schema.get("type", "N/A")
164+
return schema.get("type", "N/A")

0 commit comments

Comments
 (0)