feat: detect vulnerable GitHub Actions

behnazh-w · behnazh-w · commit e056b7c9bfed · 2025-03-26T10:29:07.000+10:00
Signed-off-by: behnazh-w &lt;behnaz.hassanshahi@oracle.com&gt;
diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
@@ -544,6 +544,12 @@ url_netloc = api.deps.dev
 url_scheme = https
 purl_endpoint = v3alpha/purl
 
+[osv_dev]
+url_netloc = api.osv.dev
+url_scheme = https
+query_endpoint = v1/query
+querybatch_endpoint = v1/querybatch
+
 # Configuration options for selecting the checks to run.
 # Both the exclude and include are defined as list of strings:
 #   - The exclude list is used to specify the checks that will not run.
diff --git a/src/macaron/database/db_custom_types.py b/src/macaron/database/db_custom_types.py
@@ -102,6 +102,38 @@ def process_result_value(self, value: None | dict, dialect: Any) -> None | dict:
         return value
 
 
+class DBJsonList(TypeDecorator):  # pylint: disable=W0223
+    """SQLAlchemy column type to serialize lists."""
+
+    # It is stored in the database as a json value.
+    impl = JSON
+
+    # To prevent Sphinx from rendering the docstrings for `cache_ok`, make this docstring private.
+    #: :meta private:
+    cache_ok = True
+
+    def process_bind_param(self, value: None | list, dialect: Any) -> None | list:
+        """Process when storing a dict object to the SQLite db.
+
+        value: None | list
+            The value being stored.
+        """
+        if not isinstance(value, list):
+            raise TypeError("DBJsonList type expects a list.")
+
+        return value
+
+    def process_result_value(self, value: None | list, dialect: Any) -> None | list:
+        """Process when loading a dict object from the SQLite db.
+
+        value: None | list
+            The value being loaded.
+        """
+        if not isinstance(value, list):
+            raise TypeError("DBJsonList type expects a list.")
+        return value
+
+
 class ProvenancePayload(TypeDecorator):  # pylint: disable=W0223
     """SQLAlchemy column type to serialize InTotoProvenance."""
 
diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -5,7 +5,6 @@
 
 import logging
 
-import requests
 from problog import get_evaluatable
 from problog.logic import Term
 from problog.program import PrologString
@@ -34,16 +33,16 @@
 from macaron.slsa_analyzer.checks.base_check import BaseCheck
 from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
 from macaron.slsa_analyzer.package_registry.deps_dev import APIAccessError, DepsDevService
+from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService
 from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
 from macaron.slsa_analyzer.registry import registry
 from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
-from macaron.util import send_post_http_raw
 
 logger: logging.Logger = logging.getLogger(__name__)
 
 
 class MaliciousMetadataFacts(CheckFacts):
-    """The ORM mapping for justifications in pypi heuristic check."""
+    """The ORM mapping for justifications in malicious metadata check."""
 
     __tablename__ = "_detect_malicious_metadata_check"
 
@@ -71,14 +70,10 @@ class MaliciousMetadataFacts(CheckFacts):
 class DetectMaliciousMetadataCheck(BaseCheck):
     """This check analyzes the metadata of a package for malicious behavior."""
 
-    # The OSV knowledge base query database.
-    osv_query_url = "https://api.osv.dev/v1/query"
-
     def __init__(self) -> None:
         """Initialize a check instance."""
         check_id = "mcn_detect_malicious_metadata_1"
         description = """This check analyzes the metadata of a package based on reports malicious behavior.
-        Supported ecosystem for unknown malware: PyPI.
         """
         super().__init__(check_id=check_id, description=description, eval_reqs=[])
 
@@ -222,38 +217,34 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         package_registry_info_entries = ctx.dynamic_data["package_registries"]
 
         # First check if this package is a known malware
-        data = {"package": {"purl": ctx.component.purl}}
-
         try:
             package_exists = bool(DepsDevService.get_package_info(ctx.component.purl))
         except APIAccessError as error:
             logger.debug(error)
 
         # Known malicious packages must have been removed.
         if not package_exists:
-            response = send_post_http_raw(self.osv_query_url, json_data=data, headers=None)
-            res_obj = None
-            if response:
-                try:
-                    res_obj = response.json()
-                except requests.exceptions.JSONDecodeError as error:
-                    logger.debug("Unable to get a valid response from %s: %s", self.osv_query_url, error)
-            if res_obj:
-                for vuln in res_obj.get("vulns", {}):
-                    if v_id := json_extract(vuln, ["id"], str):
-                        result_tables.append(
-                            MaliciousMetadataFacts(
-                                known_malware=f"https://osv.dev/vulnerability/{v_id}",
-                                result={},
-                                detail_information=vuln,
-                                confidence=Confidence.HIGH,
-                            )
+            vulns: list = []
+            try:
+                vulns = OSVDevService.get_vulnerabilities_purl(ctx.component.purl)
+            except APIAccessError as error:
+                logger.debug(error)
+
+            for vuln in vulns:
+                if v_id := json_extract(vuln, ["id"], str):
+                    result_tables.append(
+                        MaliciousMetadataFacts(
+                            known_malware=f"https://osv.dev/vulnerability/{v_id}",
+                            result={},
+                            detail_information=vuln,
+                            confidence=Confidence.HIGH,
                         )
-                if result_tables:
-                    return CheckResultData(
-                        result_tables=result_tables,
-                        result_type=CheckResultType.FAILED,
                     )
+            if result_tables:
+                return CheckResultData(
+                    result_tables=result_tables,
+                    result_type=CheckResultType.FAILED,
+                )
 
         # If the package is not a known malware, run malware analysis heuristics.
         for package_registry_info_entry in package_registry_info_entries:
diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains the implementation of the GitHub Actions vulnerabilities check."""
+
+import logging
+import os
+
+from sqlalchemy import ForeignKey, String
+from sqlalchemy.orm import Mapped, mapped_column
+
+from macaron.database.db_custom_types import DBJsonList
+from macaron.database.table_definitions import CheckFacts
+from macaron.errors import APIAccessError
+from macaron.json_tools import json_extract
+from macaron.slsa_analyzer.analyze_context import AnalyzeContext
+from macaron.slsa_analyzer.checks.base_check import BaseCheck, CheckResultType
+from macaron.slsa_analyzer.checks.check_result import CheckResultData, Confidence, JustificationType
+from macaron.slsa_analyzer.ci_service.github_actions.analyzer import GitHubWorkflowNode, GitHubWorkflowType
+from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService
+from macaron.slsa_analyzer.registry import registry
+from macaron.slsa_analyzer.slsa_req import ReqName
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class GitHubActionsVulnsFacts(CheckFacts):
+    """The ORM mapping for justifications in the GitHub Actions vulnerabilities check."""
+
+    __tablename__ = "_github_actions_vulnerabilities_check"
+
+    #: The primary key.
+    id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True)  # noqa: A003
+
+    #: The list of vulnerability URLs.
+    vulnerability_urls: Mapped[list[str]] = mapped_column(
+        DBJsonList, nullable=False, info={"justification": JustificationType.TEXT}
+    )
+
+    #: The GitHub Action Identifier.
+    github_actions_id: Mapped[str] = mapped_column(
+        String, nullable=False, info={"justification": JustificationType.TEXT}
+    )
+
+    #: The GitHub Action version.
+    github_actions_version: Mapped[str] = mapped_column(
+        String, nullable=False, info={"justification": JustificationType.TEXT}
+    )
+
+    #: The GitHub Action workflow that calls the vulnerable GitHub Action.
+    caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF})
+
+    __mapper_args__ = {
+        "polymorphic_identity": "_github_actions_vulnerabilities_check",
+    }
+
+
+class GitHubActionsVulnsCheck(BaseCheck):
+    """This Check checks whether the GitHub Actions called from the corresponding repo have known vulnerabilities.
+
+    Note: This check analyzes the direct GitHub Actions dependencies only.
+    TODO: Check GitHub Actions dependencies recursively.
+    """
+
+    def __init__(self) -> None:
+        """Initialize instance."""
+        check_id = "mcn_githubactions_vulnerabilities_1"
+        description = "Check whether the GitHub Actions called from the corresponding repo have known vulnerabilities.."
+        depends_on: list[tuple[str, CheckResultType]] = [("mcn_version_control_system_1", CheckResultType.PASSED)]
+        eval_reqs = [ReqName.SECURITY]
+        super().__init__(check_id=check_id, description=description, depends_on=depends_on, eval_reqs=eval_reqs)
+
+    def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
+        """Implement the check in this method.
+
+        Parameters
+        ----------
+        ctx : AnalyzeContext
+            The object containing processed data for the target repo.
+
+        Returns
+        -------
+        CheckResultData
+            The result of the check.
+        """
+        result_tables: list[CheckFacts] = []
+
+        ci_services = ctx.dynamic_data["ci_services"]
+
+        external_workflows: dict[str, list] = {}
+        for ci_info in ci_services:
+            for callee in ci_info["callgraph"].bfs():
+                if isinstance(callee, GitHubWorkflowNode) and callee.node_type in [
+                    GitHubWorkflowType.EXTERNAL,
+                    GitHubWorkflowType.REUSABLE,
+                ]:
+                    if "@" in callee.name:
+                        workflow_name, workflow_version = callee.name.split("@")
+                    else:
+                        # Most likely we have encountered an internal reusable workflow, which
+                        # can be skipped.
+                        logger.debug("GitHub Actions workflow %s misses a version. Skipping...", callee.name)
+                        continue
+
+                    caller_path = callee.caller.source_path if callee.caller else None
+
+                    if not workflow_name:
+                        logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
+                        continue
+
+                    ext_workflow: list = external_workflows.get(workflow_name, [])
+                    ext_workflow.append(
+                        {
+                            "version": workflow_version,
+                            "caller_path": ci_info["service"].api_client.get_file_link(
+                                ctx.component.repository.full_name,
+                                ctx.component.repository.commit_sha,
+                                file_path=(
+                                    ci_info["service"].api_client.get_relative_path_of_workflow(
+                                        os.path.basename(caller_path)
+                                    )
+                                    if caller_path
+                                    else ""
+                                ),
+                            ),
+                        }
+                    )
+                    external_workflows[workflow_name] = ext_workflow
+
+        # We first send a batch query to see which GitHub Actions are potentially vulnerable.
+        # OSV's querybatch returns minimal results but this allows us to only make subsequent
+        # queries to get vulnerability details when needed.
+        batch_query = [{"name": k, "ecosystem": "GitHub Actions"} for k, _ in external_workflows.items()]
+        batch_vulns = []
+        try:
+            batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query)
+        except APIAccessError as error:
+            logger.debug(error)
+
+        for vuln_res in batch_vulns:
+            vulns: list = []
+            workflow_name = vuln_res["name"]
+            try:
+                vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name)
+            except APIAccessError as error:
+                logger.debug(error)
+                continue
+            for workflow_inv in external_workflows[workflow_name]:
+                vuln_mapping = []
+                for vuln in vulns:
+                    if v_id := json_extract(vuln, ["id"], str):
+                        try:
+                            if OSVDevService.is_version_affected(
+                                vuln, workflow_name, workflow_inv["version"], "GitHub Actions"
+                            ):
+                                vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}")
+                        except APIAccessError as error:
+                            logger.debug(error)
+                if vuln_mapping:
+                    result_tables.append(
+                        GitHubActionsVulnsFacts(
+                            vulnerability_urls=vuln_mapping,
+                            github_actions_id=workflow_name,
+                            github_actions_version=workflow_inv["version"],
+                            caller_workflow=workflow_inv["caller_path"],
+                            confidence=Confidence.HIGH,
+                        )
+                    )
+
+        if result_tables:
+            return CheckResultData(
+                result_tables=result_tables,
+                result_type=CheckResultType.FAILED,
+            )
+
+        return CheckResultData(
+            result_tables=[],
+            result_type=CheckResultType.PASSED,
+        )
+
+
+registry.register(GitHubActionsVulnsCheck())
diff --git a/src/macaron/slsa_analyzer/package_registry/osv_dev.py b/src/macaron/slsa_analyzer/package_registry/osv_dev.py
diff --git a/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py b/tests/slsa_analyzer/checks/test_detect_malicious_metadata_check.py