Skip to content
Draft

TBD #105

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified gcp/integration_quickstart/dist/gcp_integration_quickstart.pyz
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"cloudresourcemanager.googleapis.com",
]

ROLES_TO_ADD: list[str] = [
REQUIRED_ROLES: list[str] = [
"roles/cloudasset.viewer",
"roles/browser",
"roles/compute.viewer",
Expand Down Expand Up @@ -78,13 +78,13 @@ def create_integration_with_permissions(
]
)

required_roles = ROLES_TO_ADD.copy()
required_roles = REQUIRED_ROLES.copy()
if product_requirements:
required_roles.extend(
[
role
for role in product_requirements.required_roles
if role not in ROLES_TO_ADD
if role not in REQUIRED_ROLES
]
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def main():
print(
f"Missing required environment variables: {', '.join(missing_environment_vars)}"
)
exit(1)
sys.exit(1)

workflow_id = os.environ["WORKFLOW_ID"]

Expand All @@ -58,7 +58,7 @@ def main():
print(
f"Workflow ID {workflow_id} has already been used. Please start a new workflow."
)
exit(1)
sys.exit(1)

workflow_reporter.handle_login_step()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from gcp_integration_quickstart.integration_configuration import (
REQUIRED_APIS,
ROLES_TO_ADD,
REQUIRED_ROLES,
assign_delegate_permissions,
create_integration_with_permissions,
)
Expand Down Expand Up @@ -203,7 +203,7 @@ def test_create_integration_with_permissions_success(
f"services enable {services_str} --project child-project123 --quiet"
)

for role in ROLES_TO_ADD:
for role in REQUIRED_ROLES:
expected_commands.append(
f"resource-manager folders add-iam-policy-binding folder123 --member serviceAccount:{self.service_account} --role {role} --condition None --quiet"
)
Expand All @@ -212,7 +212,7 @@ def test_create_integration_with_permissions_success(
f"services enable {services_str} --project project123 --quiet"
)

for role in ROLES_TO_ADD:
for role in REQUIRED_ROLES:
expected_commands.append(
f"projects add-iam-policy-binding project123 --member serviceAccount:{self.service_account} --role {role} --condition None --quiet"
)
Expand Down Expand Up @@ -289,7 +289,7 @@ def test_create_integration_with_permissions_with_product_requirements(
f"services enable {services_str} --project project123 --quiet"
)

all_roles = ROLES_TO_ADD + additional_required_roles
all_roles = REQUIRED_ROLES + additional_required_roles
for role in all_roles:
expected_commands.append(
f"projects add-iam-policy-binding project123 --member serviceAccount:{self.service_account} --role {role} --condition None --quiet"
Expand Down
48 changes: 48 additions & 0 deletions gcp/issue_resolver/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Overview

This project provides **Datadog's GCP issue resolver** functionality for customers. It supports:

- Automated issue detection and resolution
- GCP resource configuration checks
- Integration health monitoring
- Diagnostic reporting

The produced executable is intended to run in a [Google Cloud Shell](https://cloud.google.com/shell/docs/using-cloud-shell) environment.

For development purposes, the script can also be run locally (assuming you have the [gcloud CLI](https://cloud.google.com/sdk/docs/install) set up).

During final testing, upload the executable to Google Cloud Shell and run it there.

---

# Development

### Dev Setup

See instructions in the main `gcp/` folder.

### Testing

Run all tests from the `issue_resolver` folder:

```bash
python -m pytest tests/ --tb=short
```

### Build / Ship

From the `gcp/` folder, run:

```bash
bash issue_resolver/build.sh
```

# Execution

To run issue resolver, you must first open the Quickstart onboarding UI, which can be found [here](https://app.datadoghq.com/integrations/google-cloud-platform/add) under "Issue Resolver".

At the top of this page, you will see a "setup script" snippet. Copy that snippet into Google Cloud Shell and run it.

Once this command is run, the setup script will connect to Datadog and begin reporting back to the onboarding UI, where you will continue the issue resolution process.


16 changes: 16 additions & 0 deletions gcp/issue_resolver/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2 License.

# This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.

rm -rf issue_resolver/dist/tmp
mkdir -p issue_resolver/dist/tmp
cp -r shared/src/. issue_resolver/dist/tmp
cp -r issue_resolver/src/. issue_resolver/dist/tmp
python -m zipapp issue_resolver/dist/tmp \
-o issue_resolver/dist/gcp_issue_resolver.pyz \
-p "/usr/bin/env python3" \
-m "gcp_issue_resolver.main:main"
chmod +x issue_resolver/dist/gcp_issue_resolver.pyz
rm -r issue_resolver/dist/tmp


4 changes: 4 additions & 0 deletions gcp/issue_resolver/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[pytest]
pythonpath=src:../shared/src


5 changes: 5 additions & 0 deletions gcp/issue_resolver/src/gcp_issue_resolver/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2 License.

# This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.


109 changes: 109 additions & 0 deletions gcp/issue_resolver/src/gcp_issue_resolver/issue_resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2 License.

# This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.

from typing import Any

from gcp_shared.gcloud import gcloud
from gcp_shared.models import ConfigurationScope
from gcp_shared.reporter import StepStatusReporter
from gcp_shared.requests import dd_request

from .models import IssueResolverConfiguration


def diagnose_issues(
step_reporter: StepStatusReporter,
service_account_email: str,
issue_resolver_configuration: IssueResolverConfiguration,
configuration_scope: ConfigurationScope,
) -> list[dict[str, Any]]:
"""Diagnose issues in the GCP integration."""

step_reporter.report(
message=(
f"Diagnosing issues for service account '{service_account_email}'..."
)
)

issues: list[dict[str, Any]] = []


# Check permissions for folders
for folder in configuration_scope.folders:
step_reporter.report(
message=f"Checking permissions for folder '{folder.name}'"
)
# TODO: Add actual issue detection logic here
# This is a placeholder for the actual implementation
pass

# Check permissions for projects
for project in configuration_scope.projects:
step_reporter.report(
message=f"Checking permissions for project '{project.name}'"
)
# TODO: Add actual issue detection logic here
# This is a placeholder for the actual implementation
pass

step_reporter.report(
message=f"Found {len(issues)} issue(s)",
metadata={"issues": issues}
)

return issues


def resolve_issues(
step_reporter: StepStatusReporter,
service_account_email: str,
issues: list[dict[str, Any]],
issue_resolver_configuration: IssueResolverConfiguration,
) -> None:
"""Resolve identified issues in the GCP integration."""

if not issues:
step_reporter.report(message="No issues to resolve")
return

step_reporter.report(
message=f"Resolving {len(issues)} issue(s)..."
)

resolved_count = 0
failed_count = 0

for issue in issues:
issue_type = issue.get("type", "unknown")
issue_id = issue.get("id", "unknown")

try:
if issue_resolver_configuration.dry_run:
step_reporter.report(
message=f"[DRY RUN] Would resolve issue: {issue_type} ({issue_id})"
)
else:
step_reporter.report(
message=f"Resolving issue: {issue_type} ({issue_id})"
)
# TODO: Add actual issue resolution logic here
# This is a placeholder for the actual implementation
pass

resolved_count += 1

except Exception as e:
step_reporter.report(
message=f"Failed to resolve issue {issue_id}: {str(e)}"
)
failed_count += 1

step_reporter.report(
message=f"Resolution complete: {resolved_count} resolved, {failed_count} failed",
metadata={
"resolved_count": resolved_count,
"failed_count": failed_count
}
)

103 changes: 103 additions & 0 deletions gcp/issue_resolver/src/gcp_issue_resolver/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2 License.

# This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.

import os
import signal
import sys

from gcp_shared.models import (
ConfigurationScope,
Project,
from_dict_recursive,
)
from gcp_shared.gcloud import gcloud
from gcp_shared.service_accounts import find_service_account

from .issue_resolution import (
diagnose_issues,
resolve_issues,
)
from .models import IssueResolverConfiguration

REQUIRED_ENVIRONMENT_VARS: set[str] = {
"DD_API_KEY",
"DD_APP_KEY",
"DD_SITE",
"SERVICE_ACCOUNT_ID",
"DEFAULT_PROJECT_ID",
}


def main():
signal.signal(signal.SIGINT, sigint_handler)
missing_environment_vars = REQUIRED_ENVIRONMENT_VARS - os.environ.keys()
if missing_environment_vars:
print(
f"Missing required environment variables: {', '.join(missing_environment_vars)}"
)
sys.exit(1)

if not gcloud.is_logged_in():
print("You must be logged in to GCloud CLI to run this script.")
sys.exit(1)

service_account_email = find_service_account(os.environ["SERVICE_ACCOUNT_ID"], os.environ["DEFAULT_PROJECT_ID"])
if not service_account_email:
print(f"Service account '{os.environ['SERVICE_ACCOUNT_ID']}' not found in project '{os.environ['DEFAULT_PROJECT_ID']}'")
sys.exit(1)

# workflow_reporter.handle_login_step()

# with workflow_reporter.report_step(OnboardingStep.SCOPES) as step_reporter:
# if not workflow_reporter.is_scopes_step_already_completed():
# collect_configuration_scopes(step_reporter)

# with workflow_reporter.report_step(OnboardingStep.SELECTIONS):
# user_selections = workflow_reporter.receive_user_selections()
# with workflow_reporter.report_step(
# OnboardingStep.CREATE_SERVICE_ACCOUNT
# ) as step_reporter:
# service_account_email = find_or_create_service_account(
# step_reporter,
# user_selections["service_account_id"],
# user_selections["default_project_id"],
# )
# with workflow_reporter.report_step(
# OnboardingStep.DIAGNOSE_ISSUES
# ) as step_reporter:
# issues = diagnose_issues(
# step_reporter,
# service_account_email,
# IssueResolverConfiguration(**user_selections["issue_resolver_configuration"]),
# ConfigurationScope(
# projects=[
# Project(**project)
# for project in user_selections.get("projects", [])
# ],
# folders=[
# from_dict_recursive(folder)
# for folder in user_selections.get("folders", [])
# ],
# ),
# )
# with workflow_reporter.report_step(
# OnboardingStep.RESOLVE_ISSUES
# ) as step_reporter:
# resolve_issues(
# step_reporter,
# service_account_email,
# issues,
# IssueResolverConfiguration(**user_selections["issue_resolver_configuration"]),
# )

print("Script succeeded. You may exit this shell.")


def sigint_handler(_, __):
print("Script terminating.")
sys.exit(0)


if __name__ == "__main__":
main()
18 changes: 18 additions & 0 deletions gcp/issue_resolver/src/gcp_issue_resolver/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2 License.

# This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.

from dataclasses import dataclass
from typing import Any


@dataclass
class IssueResolverConfiguration:
"""Holds configuration details for the GCP issue resolver."""

issue_types: list[str]
auto_fix_enabled: bool
dry_run: bool
notification_preferences: dict[str, Any]


5 changes: 5 additions & 0 deletions gcp/issue_resolver/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2 License.

# This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.


Loading