nf-core
diff --git a/‎pulumi/AWSMegatests/Pulumi.yaml‎
Lines changed: 3 additions & 3 deletions b/‎pulumi/AWSMegatests/Pulumi.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pulumi/AWSMegatests/__main__.py‎
Lines changed: 29 additions & 66 deletions b/‎pulumi/AWSMegatests/__main__.py‎
Lines changed: 29 additions & 66 deletions
diff --git a/‎pulumi/AWSMegatests/pyproject.toml‎
Lines changed: 5 additions & 4 deletions b/‎pulumi/AWSMegatests/pyproject.toml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎pulumi/AWSMegatests/scripts/README.md‎
Lines changed: 59 additions & 0 deletions b/‎pulumi/AWSMegatests/scripts/README.md‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎pulumi/AWSMegatests/scripts/requirements.txt‎
Lines changed: 3 additions & 0 deletions b/‎pulumi/AWSMegatests/scripts/requirements.txt‎
Lines changed: 3 additions & 0 deletions
@@ -9,11 +9,11 @@ config:
   # GitHub Provider Configuration
   github:owner:
     value: nf-core
-  # Note: AWS and other tokens are provided via ESC environment
-  # AWS credentials should come from ESC OIDC integration
+    # Note: AWS and other tokens are provided via ESC environment
+    # AWS credentials should come from ESC OIDC integration
 packages:
   seqera:
     source: terraform-provider
-    version: 0.13.0
+    version: 0.25.2
     parameters:
       - registry.terraform.io/seqeralabs/seqera
@@ -38,78 +38,41 @@ def main():
     # Note: lifecycle_configuration is managed manually, not used in exports
 
     # Step 5: Create TowerForge IAM credentials and upload to Seqera Platform
-    towerforge_access_key_id, towerforge_access_key_secret, seqera_credentials_id = (
-        create_towerforge_credentials(
-            aws_provider,
-            nf_core_awsmegatests_bucket,
-            seqera_provider,
-            float(config["tower_workspace_id"]),
-        )
+    (
+        towerforge_access_key_id,
+        towerforge_access_key_secret,
+        seqera_credentials_id,
+        seqera_credential_resource,
+        iam_policy_hash,
+    ) = create_towerforge_credentials(
+        aws_provider,
+        nf_core_awsmegatests_bucket,
+        seqera_provider,
+        float(config["tower_workspace_id"]),
     )
 
     # Step 6: Deploy Seqera Platform compute environments using Terraform provider
-    try:
-        pulumi.log.info(
-            "Deploying Seqera compute environments using Terraform provider"
-        )
-
-        # Deploy using Seqera Terraform provider with dynamic credentials ID
-        terraform_resources = deploy_seqera_environments_terraform(
-            config,
-            seqera_credentials_id,  # Dynamic TowerForge credentials ID from Seqera Platform
-            seqera_provider,  # Reuse existing Seqera provider
-        )
-
-        # Get compute environment IDs from Terraform provider
-        compute_env_ids = get_compute_environment_ids_terraform(terraform_resources)
-        deployment_method = "terraform-provider"
-
-        pulumi.log.info(
-            "Successfully deployed compute environments using Seqera Terraform provider"
-        )
-    except Exception as e:
-        error_msg = (
-            f"Seqera deployment failed: {e}. "
-            "Common solutions: "
-            "1. Verify TOWER_ACCESS_TOKEN has WORKSPACE_ADMIN permissions "
-            "2. Check workspace ID is correct in ESC environment "
-            "3. Ensure TowerForge credentials were successfully uploaded to Seqera Platform "
-            "4. Verify network connectivity to api.cloud.seqera.io"
-        )
-        pulumi.log.error(error_msg)
-        raise RuntimeError(error_msg)
+    # Deploy using Seqera Terraform provider with dynamic credentials ID
+    terraform_resources = deploy_seqera_environments_terraform(
+        config,
+        seqera_credentials_id,  # Dynamic TowerForge credentials ID from Seqera Platform
+        seqera_provider,  # Reuse existing Seqera provider
+        seqera_credential_resource,  # Seqera credential resource for dependency
+        iam_policy_hash,  # IAM policy hash to force CE recreation on policy changes
+    )
+
+    # Get compute environment IDs from Terraform provider
+    compute_env_ids = get_compute_environment_ids_terraform(terraform_resources)
+    deployment_method = "terraform-provider"
 
     # Step 8: Create GitHub resources
     # Full GitHub integration enabled - creates both variables and secrets
-    try:
-        pulumi.log.info("Creating GitHub organization variables and secrets")
-
-        github_resources = create_github_resources(
-            github_provider,
-            compute_env_ids,
-            config["tower_workspace_id"],
-            tower_access_token=config["tower_access_token"],
-        )
-
-        pulumi.log.info(
-            "Successfully created GitHub variables. Manual secret commands available in outputs."
-        )
-    except Exception as e:
-        error_msg = (
-            f"GitHub integration failed: {e}. "
-            "This is often harmless if variables already exist (409 errors). "
-            "Common issues: "
-            "1. GitHub token lacks org-level permissions "
-            "2. Variables already exist (409 Already Exists - harmless) "
-            "3. Network connectivity to api.github.com"
-        )
-        pulumi.log.warn(error_msg)
-        github_resources = {
-            "variables": {},
-            "secrets": {},
-            "gh_cli_commands": [],
-            "note": f"GitHub integration failed: {e}",
-        }
+    github_resources = create_github_resources(
+        github_provider,
+        compute_env_ids,
+        config["tower_workspace_id"],
+        tower_access_token=config["tower_access_token"],
+    )
 
     # Exports - All within proper Pulumi program context
     pulumi.export(
 
@@ -12,11 +12,12 @@ dependencies = [
     "pulumi-seqera",
 ] 
 
-[tool.uv.sources]
-pulumi-seqera = { path = "sdks/seqera" }
-
-
 [dependency-groups]
 dev = [
     "mypy>=1.17.1",
+    "pytest>=7.0.0",
+    "requests>=2.28.0",
 ]
+
+[tool.uv.sources]
+pulumi-seqera = { path = "sdks/seqera" }
@@ -0,0 +1,59 @@
+# AWS Megatests Utility Scripts
+
+This directory contains utility scripts for AWS Megatests infrastructure management.
+
+## Log File Tagging Script
+
+**Purpose**: One-time batch job to tag existing log files in S3 work directories for preservation during lifecycle cleanup.
+
+### Usage
+
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+# Dry run to see what would be tagged
+python tag_existing_log_files.py --bucket nf-core-awsmegatests --dry-run
+
+# Actually tag the files
+python tag_existing_log_files.py --bucket nf-core-awsmegatests
+
+# With custom threading (default is 10 workers)
+python tag_existing_log_files.py --bucket nf-core-awsmegatests --max-workers 20
+```
+
+### Prerequisites
+
+- AWS credentials configured (AWS CLI, IAM role, or environment variables)
+- S3 permissions: `s3:ListBucket`, `s3:GetObjectTagging`, `s3:PutObjectTagging`
+
+### What it does
+
+1. Scans the `work/` directory for log files matching Nextflow patterns
+2. Tags log files with `nextflow.io/metadata=true`
+3. Preserves existing tags while adding the metadata tag
+4. Uses multi-threading for performance with large buckets
+
+### Log File Patterns
+
+The script identifies these Nextflow log files:
+
+- `.command.log` - Main command log
+- `.command.err` - Error log
+- `.command.out` - Standard output
+- `.exitcode` - Exit code file
+- `.command.sh` - Command script
+- `.command.run` - Run script
+- `.command.begin` - Begin timestamp
+- `trace.txt` - Trace file
+- `timeline.html` - Timeline report
+- `report.html` - Execution report
+- `dag.html` - DAG visualization
+
+### Integration with Lifecycle Rules
+
+Tagged files are preserved by S3 lifecycle rules:
+
+- Tagged log files: Kept for 90 days, then moved to cheaper storage classes
+- Untagged work files: Deleted after 14 days for aggressive cleanup
+- Future log files will be tagged automatically by Nextflow (no need to run this script again)
@@ -0,0 +1,3 @@
+# Requirements for log file tagging script
+boto3>=1.26.0
+botocore>=1.29.0
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Requirements for log file tagging script`
	`2`	`+boto3>=1.26.0`
	`3`	`+botocore>=1.29.0`