From 910ded9e86057fcb778b6fd722897ccac536d686 Mon Sep 17 00:00:00 2001 From: Jeff Palmer Date: Tue, 17 Mar 2026 11:23:02 -0500 Subject: [PATCH 1/7] First commit for aws-step-functions-jsonata --- aws-step-functions-jsonata/POWER.md | 183 +++++++ .../steering/architecture-patterns.md | 488 +++++++++++++++++ .../steering/asl-state-types.md | 474 +++++++++++++++++ .../steering/error-handling.md | 445 ++++++++++++++++ .../steering/service-integrations.md | 485 +++++++++++++++++ .../steering/variables-and-data.md | 498 ++++++++++++++++++ 6 files changed, 2573 insertions(+) create mode 100644 aws-step-functions-jsonata/POWER.md create mode 100644 aws-step-functions-jsonata/steering/architecture-patterns.md create mode 100644 aws-step-functions-jsonata/steering/asl-state-types.md create mode 100644 aws-step-functions-jsonata/steering/error-handling.md create mode 100644 aws-step-functions-jsonata/steering/service-integrations.md create mode 100644 aws-step-functions-jsonata/steering/variables-and-data.md diff --git a/aws-step-functions-jsonata/POWER.md b/aws-step-functions-jsonata/POWER.md new file mode 100644 index 0000000..b46765d --- /dev/null +++ b/aws-step-functions-jsonata/POWER.md @@ -0,0 +1,183 @@ +--- +name: "step-functions-jsonata" +displayName: "AWS Step Functions with JSONata" +description: "Build AWS Step Functions state machines using JSONata query language. Covers ASL structure, all state types, variables, data transformation, error handling, and service integrations in JSONata mode." +keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow orchestration"] +author: "Jeff Palmer https://linkedin.com/in/jeffrey-palmer/" +--- + +# Step Functions JSONata + +Build AWS Step Functions state machines using the JSONata query language instead of legacy JSONPath. JSONata simplifies data transformation, reduces boilerplate, and reduces external dependencies. + +## Overview + +AWS Step Functions uses Amazon States Language (ASL) to define state machines as JSON. With JSONata mode, you replace the five JSONPath I/O fields (InputPath, Parameters, ResultSelector, ResultPath, OutputPath) with just two fields: `Arguments` and `Output`. You also gain workflow variables via `Assign`, and powerful `Condition` expressions in Choice states. + +This power provides comprehensive guidance for writing state machines in JSONata mode, covering: +- ASL structure and all eight state types in JSONata mode +- The `$states` reserved variable and JSONata expression syntax +- Workflow variables with `Assign` for cross-state data sharing +- Data transformation patterns with `Arguments` and `Output` +- Error handling with `Retry` and `Catch` +- Service integration patterns (Lambda, DynamoDB, SNS, SQS, etc.) + +## When to Load Steering Files + +Load the appropriate steering file based on what the user is working on: + +- **ASL structure**, **state types**, **Task**, **Pass**, **Choice**, **Wait**, **Succeed**, **Fail**, **Parallel**, **Map** → see `asl-state-types.md` +- **Variables**, **Assign**, **data passing**, **scope**, **$states**, **input**, **output**, **Arguments**, **Output**, **data transformation** → see `variables-and-data.md` +- **Error handling**, **Retry**, **Catch**, **fallback**, **error codes**, **States.Timeout**, **States.ALL** → see `error-handling.md` +- **Service integrations**, **Lambda invoke**, **DynamoDB**, **SNS**, **SQS**, **SDK integrations**, **Resource ARN**, **sync**, **async** → see `service-integrations.md` + +## Quick Reference + +### Enabling JSONata + +Set `QueryLanguage` at the top level to apply to all states: + +```json +{ + "QueryLanguage": "JSONata", + "StartAt": "MyState", + "States": { ... } +} +``` + +### JSONata Expression Syntax + +Wrap expressions in `{% %}`: + +```json +"Output": "{% $states.input.customer.name %}" +"TimeoutSeconds": "{% $timeout %}" +"Condition": "{% $states.input.age >= 18 %}" +``` + +### The `$states` Reserved Variable + +``` +$states.input → Original state input +$states.result → Task/Parallel/Map result (on success) +$states.errorOutput → Error output (only in Catch) +$states.context → Execution context object +``` + +### Key Fields in JSONata Mode + +| Field | Purpose | Available In | +|-------|---------|-------------| +| `Arguments` | Input to task/branches | Task, Parallel | +| `Output` | Transform state output | All except Fail | +| `Assign` | Store workflow variables | All except Succeed, Fail | +| `Condition` | Boolean branching | Choice rules | +| `Items` | Array for iteration | Map | + +### JSONata Functions Provided by Step Functions + +| Function | Purpose | +|----------|---------| +| `$partition(array, size)` | Partition array into chunks | +| `$range(start, end, step)` | Generate array of values | +| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | +| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | +| `$uuid()` | Generate v4 UUID | +| `$parse(jsonString)` | Deserialize JSON string | + +Plus all [built-in JSONata functions](https://github.com/jsonata-js/jsonata/tree/master/docs) + +### Minimal Complete Example + +```json +{ + "Comment": "Order processing workflow", + "QueryLanguage": "JSONata", + "StartAt": "ValidateOrder", + "States": { + "ValidateOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "OrdersTable", + "Key": { + "orderId": { + "S": "{% $states.input.orderId %}" + } + } + }, + "Assign": { + "orderId": "{% $states.input.orderId %}" + }, + "Output": "{% $states.result.Item %}", + "Next": "CheckStock" + }, + "CheckStock": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $states.input.inStock = true %}", + "Next": "ProcessPayment" + } + ], + "Default": "OutOfStock" + }, + "ProcessPayment": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/PaymentQueue", + "MessageBody": "{% $string({'orderId': $orderId, 'amount': $states.input.total.N}) %}" + }, + "Output": { + "orderId": "{% $orderId %}", + "messageId": "{% $states.result.MessageId %}" + }, + "Retry": [ + { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 2, + "MaxAttempts": 3, + "BackoffRate": 2.0 + } + ], + "End": true + }, + "OutOfStock": { + "Type": "Fail", + "Error": "OutOfStockError", + "Cause": "Requested item is out of stock" + } + } +} +``` + +## Best Practices + +- Always set `"QueryLanguage": "JSONata"` at the top level for new state machines +- Use `Assign` to store data needed in later states instead of threading it through Output +- Keep `Output` minimal — only provide what the next state actually needs +- Use `$states.input` to reference original state input, not `$` (which is restricted at top level in JSONata) +- Remember: `Assign` and `Output` are evaluated in parallel — variable assignments in `Assign` are NOT available in `Output` of the same state +- All JSONata expressions must produce a defined value — `$data.nonExistentField` throws `States.QueryEvaluationError` +- Use `$states.context.Execution.Input` to access the original workflow input from any state +- Save state machine definitions with `.asl.json` extension when working outside the console +- Prefer the optimized Lambda integration (`arn:aws:states:::lambda:invoke`) over the SDK integration + +## Troubleshooting + +### Common Errors + +- `States.QueryEvaluationError` — JSONata expression failed. Check for type errors, undefined fields, or out-of-range values. +- Mixing JSONPath fields (`Parameters`, `InputPath`, `ResultPath`, etc.) with JSONata `QueryLanguage` — these are mutually exclusive. +- Using `$` or `$$` at the top level of a JSONata expression — use `$states.input` instead. +- Forgetting `{% %}` delimiters around JSONata expressions — the string will be treated as a literal. +- Assigning variables in `Assign` and expecting them in `Output` of the same state — new values only take effect in the next state. + +## Resources + +- [ASL Specification](https://states-language.net/spec.html) +- [Transforming data with JSONata in Step Functions](https://docs.aws.amazon.com/step-functions/latest/dg/transforming-data.html) +- [Passing data between states with variables](https://docs.aws.amazon.com/step-functions/latest/dg/workflow-variables.html) +- [JSONata documentation](https://docs.jsonata.org/overview.html) +- [Step Functions Developer Guide](https://docs.aws.amazon.com/step-functions/latest/dg/welcome.html) diff --git a/aws-step-functions-jsonata/steering/architecture-patterns.md b/aws-step-functions-jsonata/steering/architecture-patterns.md new file mode 100644 index 0000000..9f82f6a --- /dev/null +++ b/aws-step-functions-jsonata/steering/architecture-patterns.md @@ -0,0 +1,488 @@ +# Architecture Patterns (JSONata Mode) + +## Polling Loop (Wait → Check → Choice) + +Many AWS operations are asynchronous — you start them and then poll until they complete. The pattern is: initial wait → call describe/status API → check result → short wait → loop back. + +```json +"SubmitOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/FulfillmentQueue", + "MessageBody": "{% $string({'orderId': $orderId, 'items': $states.input.items}) %}" + }, + "Assign": { "fulfillmentOrderId": "{% $orderId %}" }, + "Next": "InitialWaitForFulfillment" +}, +"InitialWaitForFulfillment": { + "Type": "Wait", + "Seconds": 300, + "Next": "CheckFulfillmentStatus" +}, +"CheckFulfillmentStatus": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "OrdersTable", + "Key": { "orderId": { "S": "{% $fulfillmentOrderId %}" } } + }, + "Assign": { "orderStatus": "{% $states.result.Item.status.S %}" }, + "Next": "EvaluateFulfillment", + "Retry": [ + { "ErrorEquals": ["States.TaskFailed", "ThrottlingException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2 } + ] +}, +"EvaluateFulfillment": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $orderStatus = 'fulfilled' %}", "Next": "FulfillmentComplete" }, + { "Condition": "{% $orderStatus in ['failed', 'cancelled'] %}", "Next": "FulfillmentFailed" } + ], + "Default": "WaitBeforeNextPoll" +}, +"WaitBeforeNextPoll": { + "Type": "Wait", + "Seconds": 60, + "Next": "CheckFulfillmentStatus" +} +``` + +Key elements: +- Initial longer wait gives the operation time to start. Shorter poll interval for subsequent checks. +- Choice state routes to success, failure, or back to the wait loop. +- Always add Retry on the status-check Task to handle transient API errors. +- Consider adding `TimeoutSeconds` on the state machine or a counter variable to prevent infinite polling. + +--- + +## Compensation / Saga Pattern + +Step Functions has no built-in rollback. The saga pattern chains compensating actions in reverse order. Each forward step has a Catch that records which step failed, then routes to the appropriate compensation entry point. + +```json +"ReserveInventory": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Arguments": { + "TableName": "InventoryTable", + "Key": { "productId": { "S": "{% $states.input.productId %}" } }, + "UpdateExpression": "SET reserved = reserved + :qty", + "ExpressionAttributeValues": { ":qty": { "N": "{% $string($states.input.quantity) %}" } } + }, + "Assign": { "reservedQty": "{% $states.input.quantity %}" }, + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ReserveInventory", "errorInfo": "{% $states.errorOutput %}" }, "Next": "OrderFailed" } + ], + "Next": "ChargePayment" +}, +"ChargePayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", + "Payload": { "orderId": "{% $orderId %}", "amount": "{% $states.input.total %}" } + }, + "Assign": { "chargeId": "{% $states.result.Payload.chargeId %}" }, + "Output": "{% $states.result.Payload %}", + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ChargePayment", "errorInfo": "{% $states.errorOutput %}" }, "Next": "ReleaseInventory" } + ], + "Next": "ShipOrder" +}, +"ShipOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ShipOrder:$LATEST", + "Payload": { "orderId": "{% $orderId %}" } + }, + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ShipOrder", "errorInfo": "{% $states.errorOutput %}" }, "Next": "RefundPayment" } + ], + "Next": "OrderComplete" +}, +"RefundPayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:RefundCharge:$LATEST", + "Payload": { "chargeId": "{% $chargeId %}", "reason": "{% $errorInfo.Cause %}" } + }, + "Next": "ReleaseInventory" +}, +"ReleaseInventory": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Arguments": { + "TableName": "InventoryTable", + "Key": { "productId": { "S": "{% $states.input.productId %}" } }, + "UpdateExpression": "SET reserved = reserved - :qty", + "ExpressionAttributeValues": { ":qty": { "N": "{% $string($reservedQty) %}" } } + }, + "Next": "OrderFailed" +}, +"OrderFailed": { + "Type": "Fail", + "Error": "{% $failedStep & 'Error' %}", + "Cause": "{% 'Order ' & $orderId & ' failed at ' & $failedStep & ': ' & ($exists($errorInfo.Cause) ? $errorInfo.Cause : 'Unknown') %}" +} +``` + +Compensation chain: `ReserveInventory` fails → `OrderFailed`. `ChargePayment` fails → `ReleaseInventory` → `OrderFailed`. `ShipOrder` fails → `RefundPayment` → `ReleaseInventory` → `OrderFailed`. Each Catch records `$failedStep` and `$errorInfo`. Compensation states use variables from forward steps (`$chargeId`, `$reservedQty`) to know what to undo. + +--- + +## Nested Map / Parallel Structures + +Map, Parallel, and Task states nest in any combination. The key constraint is understanding variable scope and data flow at each nesting boundary. + +```json +"ProcessAllOrders": { + "Type": "Map", + "Items": "{% $states.input.orders %}", + "MaxConcurrency": 5, + "ItemProcessor": { + "ProcessorConfig": { "Mode": "INLINE" }, + "StartAt": "ProcessSingleOrder", + "States": { + "ProcessSingleOrder": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "ValidatePayment", + "States": { + "ValidatePayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidatePayment:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + { + "StartAt": "CheckInventory", + "States": { + "CheckInventory": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "InventoryTable", + "Key": { "productId": { "S": "{% $states.input.productId %}" } } + }, + "Output": "{% $states.result.Item %}", + "End": true + } + } + } + ], + "Output": { "payment": "{% $states.result[0] %}", "inventory": "{% $states.result[1] %}" }, + "End": true + } + } + }, + "Assign": { "orderResults": "{% $states.result %}" }, + "Next": "Summarize" +} +``` + +### Variable Scoping Across Nesting Levels + +Each nesting level creates a new scope. Inner scopes can READ outer variables but CANNOT ASSIGN to them — use `Output` on terminal states to pass data back up. Parallel branches and Map iterations are isolated from each other. Variable names must be unique across all nesting levels (no shadowing). Exception: Distributed Map (`"Mode": "DISTRIBUTED"`) cannot read outer scope variables at all. + +Data flows down via state input (use `ItemSelector` for Map, `Arguments` for Parallel) and up via `Output` on terminal states. Parallel result is an array per branch; Map result is an array per iteration. + +--- + +## Scatter-Gather with Partial Results + +When calling unreliable external APIs per-item, use `ToleratedFailurePercentage` on a Map to continue with whatever succeeded, then post-process the results to separate successes from failures. Failed iterations return objects with `Error` and `Cause` fields. + +```json +"CallExternalAPIs": { + "Type": "Map", + "Items": "{% $states.input.records %}", + "MaxConcurrency": 10, + "ToleratedFailurePercentage": 100, + "ItemProcessor": { + "ProcessorConfig": { "Mode": "INLINE" }, + "StartAt": "CallAPI", + "States": { + "CallAPI": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallExternalAPI:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Retry": [ + { "ErrorEquals": ["States.TaskFailed"], "IntervalSeconds": 2, "MaxAttempts": 2, "BackoffRate": 2.0, "JitterStrategy": "FULL" } + ], + "End": true + } + } + }, + "Next": "SplitResults" +}, +"SplitResults": { + "Type": "Pass", + "Assign": { + "successes": "{% ( $s := $states.input[$not($exists(Error))]; $type($s) = 'array' ? $s : $exists($s) ? [$s] : [] ) %}", + "failures": "{% ( $f := $states.input[$exists(Error)]; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}" + }, + "Output": { + "successes": "{% ( $s := $states.input[$not($exists(Error))]; $type($s) = 'array' ? $s : $exists($s) ? [$s] : [] ) %}", + "failures": "{% ( $f := $states.input[$exists(Error)]; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}", + "totalProcessed": "{% $count($states.input) %}" + }, + "Next": "EvaluateResults" +}, +"EvaluateResults": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $count($successes) = 0 %}", "Next": "AllFailed" } + ], + "Default": "ProcessSuccesses" +} +``` + +Key elements: +- `ToleratedFailurePercentage: 100` lets the Map complete even if every item fails. Lower the threshold to bail out early. +- Filter on `$exists(Error)` to separate failed from successful iterations. +- Guard filtered results with the `$type`/`$exists`/`[]` pattern — JSONata returns a single object (not a 1-element array) when exactly one item matches, and undefined when nothing matches. + +--- + +## Semaphore / Concurrency Lock + +Step Functions has no native mutual exclusion. Use DynamoDB conditional writes as a distributed lock when only one execution should process a given resource at a time. Pattern: acquire lock → do work → release lock, with Catch ensuring release on failure. + +```json +"AcquireLock": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Arguments": { + "TableName": "LocksTable", + "Item": { + "lockId": { "S": "{% $states.input.customerId %}" }, + "executionId": { "S": "{% $states.context.Execution.Id %}" }, + "expiresAt": { "N": "{% $string($toMillis($now()) + 900000) %}" } + }, + "ConditionExpression": "attribute_not_exists(lockId) OR expiresAt < :now", + "ExpressionAttributeValues": { + ":now": { "N": "{% $string($toMillis($now())) %}" } + } + }, + "Retry": [ + { "ErrorEquals": ["DynamoDB.ConditionalCheckFailedException"], "IntervalSeconds": 5, "MaxAttempts": 12, "BackoffRate": 1.5, "JitterStrategy": "FULL" } + ], + "Catch": [ + { "ErrorEquals": ["DynamoDB.ConditionalCheckFailedException"], "Assign": { "lockError": "{% $states.errorOutput %}" }, "Next": "LockUnavailable" } + ], + "Next": "DoProtectedWork" +}, +"DoProtectedWork": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessCustomer:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "workError": "{% $states.errorOutput %}" }, "Next": "ReleaseLock" } + ], + "Next": "ReleaseLock" +}, +"ReleaseLock": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:deleteItem", + "Arguments": { + "TableName": "LocksTable", + "Key": { "lockId": { "S": "{% $states.input.customerId %}" } }, + "ConditionExpression": "executionId = :execId", + "ExpressionAttributeValues": { ":execId": { "S": "{% $states.context.Execution.Id %}" } } + }, + "Retry": [ + { "ErrorEquals": ["States.ALL"], "IntervalSeconds": 1, "MaxAttempts": 3, "BackoffRate": 2.0 } + ], + "Next": "CheckWorkResult" +}, +"CheckWorkResult": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $exists($workError) %}", "Next": "WorkFailed" } + ], + "Default": "Done" +}, +"LockUnavailable": { + "Type": "Fail", + "Error": "LockContention", + "Cause": "{% 'Could not acquire lock for ' & $states.input.customerId & ' after retries' %}" +} +``` + +Key elements: +- `ConditionExpression` with `attribute_not_exists` ensures only one writer wins. The `expiresAt` check provides stale-lock recovery if an execution crashes without releasing. +- `executionId` on the lock item lets `ReleaseLock` conditionally delete only its own lock. +- Retry on `ConditionalCheckFailedException` acts as a spin-wait. Tune `MaxAttempts` and `IntervalSeconds` based on expected hold time. +- Catch on `DoProtectedWork` routes to `ReleaseLock` so the lock is always released. After releasing, `CheckWorkResult` re-raises the error path. +- Set `expiresAt` to a reasonable TTL (here 15 min). Use a DynamoDB TTL attribute to auto-clean expired locks. + +--- + +## Human-in-the-Loop with Timeout Escalation + +Chain multiple `.waitForTaskToken` states with `States.Timeout` catches to build escalation: primary approver → manager → auto-reject. + +```json +"RequestApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'approver': $states.input.primaryApprover, 'amount': $states.input.amount}) %}" + }, + "TimeoutSeconds": 86400, + "Assign": { "approvalResult": "{% $states.result %}" }, + "Catch": [ + { "ErrorEquals": ["States.Timeout"], "Assign": { "escalationReason": "Primary approver did not respond within 24 hours" }, "Next": "EscalateToManager" }, + { "ErrorEquals": ["States.ALL"], "Assign": { "approvalError": "{% $states.errorOutput %}" }, "Next": "ApprovalFailed" } + ], + "Next": "EvaluateApproval" +}, +"EscalateToManager": { + "Type": "Task", + "Resource": "arn:aws:states:::sns:publish", + "Arguments": { + "TopicArn": "arn:aws:sns:us-east-1:123456789012:EscalationNotifications", + "Subject": "Approval Escalation", + "Message": "{% 'Order ' & $orderId & ' requires manager approval. ' & $escalationReason %}" + }, + "Next": "WaitForManagerApproval" +}, +"WaitForManagerApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'approver': $states.input.managerApprover, 'amount': $states.input.amount, 'escalated': true}) %}" + }, + "TimeoutSeconds": 43200, + "Assign": { "approvalResult": "{% $states.result %}" }, + "Catch": [ + { + "ErrorEquals": ["States.Timeout"], + "Assign": { "approvalResult": { "decision": "rejected", "reason": "No response from manager within 12 hours — auto-rejected" } }, + "Next": "EvaluateApproval" + }, + { "ErrorEquals": ["States.ALL"], "Assign": { "approvalError": "{% $states.errorOutput %}" }, "Next": "ApprovalFailed" } + ], + "Next": "EvaluateApproval" +}, +"EvaluateApproval": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $approvalResult.decision = 'approved' %}", "Next": "ProcessApprovedOrder" } + ], + "Default": "OrderRejected" +} +``` + +Key elements: +- Each callback stage has its own `TimeoutSeconds` — shorter for escalation stages since urgency increases. +- `States.Timeout` in Catch distinguishes "no response" from actual errors, routing to the next escalation tier. +- The final tier auto-rejects by assigning a synthetic result in Catch `Assign` and routing to the same `EvaluateApproval` Choice. This avoids duplicating decision logic. +- External system calls `SendTaskSuccess` with `{"decision": "approved"}` or `{"decision": "rejected", "reason": "..."}`. +- Use Standard (not Express) workflows — Express doesn't support `.waitForTaskToken`. + +--- + +## Express → Standard Handoff + +Express workflows are cheaper (pay per request, up to 5 min) but don't support callbacks or long waits. Standard workflows handle those but cost per state transition. Use Express for fast, high-volume ingest and kick off a Standard execution for the long-running tail. + +```json +{ + "Comment": "Express workflow — fast ingest and validation", + "QueryLanguage": "JSONata", + "StartAt": "ValidateInput", + "States": { + "ValidateInput": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidateOrder:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "EnrichData" + }, + "EnrichData": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "LookupCustomer", + "States": { + "LookupCustomer": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "CustomersTable", + "Key": { "customerId": { "S": "{% $states.input.customerId %}" } } + }, + "Output": "{% $states.result.Item %}", + "End": true + } + } + }, + { + "StartAt": "LookupPricing", + "States": { + "LookupPricing": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetPricing:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + } + ], + "Output": { + "order": "{% $states.input %}", + "customer": "{% $states.result[0] %}", + "pricing": "{% $states.result[1] %}" + }, + "Next": "HandOffToStandard" + }, + "HandOffToStandard": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution", + "Arguments": { + "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment-Standard", + "Input": "{% $string($states.input) %}" + }, + "Output": { + "status": "handed_off", + "childExecutionArn": "{% $states.result.ExecutionArn %}" + }, + "End": true + } + } +} +``` + +Key elements: +- Express does validation, enrichment, fan-out — fast, stateless work that benefits from per-request pricing. +- `HandOffToStandard` uses fire-and-forget (no `.sync` suffix) so the Express execution completes immediately. Use `.sync:2` if you need to wait, but watch the 5-minute Express limit. +- Use `$string($states.input)` to serialize — `startExecution` expects a JSON string for `Input`. +- Ideal for event-driven architectures: API Gateway or EventBridge triggers Express at high volume, only orders needing long-running processing incur Standard costs. diff --git a/aws-step-functions-jsonata/steering/asl-state-types.md b/aws-step-functions-jsonata/steering/asl-state-types.md new file mode 100644 index 0000000..c530a49 --- /dev/null +++ b/aws-step-functions-jsonata/steering/asl-state-types.md @@ -0,0 +1,474 @@ +# ASL Structure and State Types (JSONata Mode) + +## State Machine Top-Level Structure + +```json +{ + "Comment": "Description of the state machine", + "QueryLanguage": "JSONata", + "StartAt": "FirstStateName", + "TimeoutSeconds": 3600, + "Version": "1.0", + "States": { + "FirstStateName": { ... }, + "SecondStateName": { ... } + } +} +``` + +- `QueryLanguage`: Set to `"JSONata"` at top level. Defaults to `"JSONPath"` if omitted. +- `StartAt`: Must exactly match a state name (case-sensitive). +- `TimeoutSeconds`: Optional max execution time. Exceeding it throws `States.Timeout`. +- `States`: Required object containing all state definitions. +- State names must be unique and ≤ 80 Unicode characters. + +## Common Fields for All JSONata States + +| Field | Description | +|-------|-------------| +| `Type` | Required. One of: Task, Pass, Choice, Wait, Parallel, Map, Succeed, Fail | +| `Comment` | Optional human-readable description | +| `Next` | Name of next state (required for non-terminal states except Choice) | +| `End` | Set to `true` for terminal states | +| `Output` | Optional. Transform state output. Available in all types except Fail | +| `Assign` | Optional. Store workflow variables. Available in all types except Succeed and Fail | +| `QueryLanguage` | Optional per-state override | + +## Field Availability Matrix (JSONata) + +``` + Task Parallel Map Pass Wait Choice Succeed Fail +Type ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ +Comment ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ +Output ✓ ✓ ✓ ✓ ✓ ✓ ✓ +Assign ✓ ✓ ✓ ✓ ✓ ✓ +Next/End ✓ ✓ ✓ ✓ ✓ +Arguments ✓ ✓ +Retry/Catch ✓ ✓ ✓ +``` + +--- + +## Pass State + +Passes input to output, optionally transforming it. Useful for injecting data or reshaping payloads. + +```json +"InjectData": { + "Type": "Pass", + "Output": { + "greeting": "{% 'Hello, ' & $states.input.name %}", + "timestamp": "{% $now() %}" + }, + "Next": "NextState" +} +``` + +With variable assignment: + +```json +"StoreDefaults": { + "Type": "Pass", + "Assign": { + "retryCount": 0, + "maxRetries": 3, + "config": "{% $states.input.configuration %}" + }, + "Next": "ProcessItem" +} +``` + +Without `Output`, the Pass state copies input to output unchanged. + +--- + +## Task State + +Executes work via AWS service integrations, activities, or HTTP APIs. + +### Required Fields +- `Resource`: ARN identifying the task to execute + +### Optional Fields +- `Arguments`: Input to the task (replaces JSONPath `Parameters`) +- `Output`: Transform the result +- `Assign`: Store variables from input or result +- `TimeoutSeconds`: Max task duration (default 60, accepts JSONata expression) +- `HeartbeatSeconds`: Heartbeat interval (must be < TimeoutSeconds) +- `Retry`: Retry policy array +- `Catch`: Error handler array +- `Credentials`: Cross-account role assumption + +### Lambda Invoke Example + +```json +"InvokeLambda": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunc:$LATEST", + "Payload": { + "orderId": "{% $states.input.orderId %}", + "customer": "{% $states.input.customer %}" + } + }, + "Assign": { + "processedResult": "{% $states.result.Payload %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "NextState" +} +``` + +### Dynamic Timeout + +```json +"LongRunningTask": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:SlowFunc:$LATEST", + "Payload": "{% $states.input %}" + }, + "TimeoutSeconds": "{% $states.input.timeoutValue %}", + "HeartbeatSeconds": "{% $states.input.heartbeatValue %}", + "Next": "Done" +} +``` + +--- + +## Choice State + +Adds branching logic. Uses `Condition` field with JSONata boolean expressions (replaces JSONPath `Variable` + comparison operators). + +### Structure + +```json +"RouteOrder": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $states.input.orderType = 'express' %}", + "Next": "ExpressShipping" + }, + { + "Condition": "{% $states.input.total > 100 %}", + "Assign": { + "discount": "{% $states.input.total * 0.1 %}" + }, + "Output": { + "total": "{% $states.input.total * 0.9 %}" + }, + "Next": "ApplyDiscount" + }, + { + "Condition": "{% $states.input.priority >= 5 and $states.input.category = 'urgent' %}", + "Next": "PriorityQueue" + } + ], + "Default": "StandardProcessing", + "Assign": { + "routedDefault": true + } +} +``` + +Key points: +- `Condition` must evaluate to a boolean. +- Each Choice Rule can have its own `Assign` and `Output`. +- If a rule matches, its `Assign`/`Output` are used (not the state-level ones). +- If no rule matches, the state-level `Assign` is evaluated and `Default` is followed. +- `Default` is optional but recommended — without it, `States.NoChoiceMatched` is thrown. +- Choice states cannot be terminal (no `End` field). + +### Complex Conditions + +JSONata supports rich boolean logic: + +```json +"Condition": "{% $states.input.age >= 18 and $states.input.age <= 65 %}" +"Condition": "{% $states.input.status = 'active' or $states.input.override = true %}" +"Condition": "{% $not($exists($states.input.error)) %}" +"Condition": "{% $contains($states.input.email, '@') %}" +"Condition": "{% $count($states.input.items) > 0 %}" +"Condition": "{% $states.input.score >= $threshold %}" +``` + +--- + +## Wait State + +Delays execution for a specified duration or until a timestamp. + +### Wait by Seconds + +```json +"WaitTenSeconds": { + "Type": "Wait", + "Seconds": 10, + "Next": "Continue" +} +``` + +### Wait with Dynamic Seconds + +```json +"DynamicWait": { + "Type": "Wait", + "Seconds": "{% $states.input.delaySeconds %}", + "Next": "Continue" +} +``` + +### Wait Until Timestamp + +```json +"WaitUntilDate": { + "Type": "Wait", + "Timestamp": "{% $states.input.scheduledTime %}", + "Next": "Execute" +} +``` + +Timestamps must conform to RFC3339 (e.g., `"2026-03-14T01:59:00Z"`). + +A Wait state must contain exactly one of `Seconds` or `Timestamp`. + +--- + +## Succeed State + +Terminates the state machine (or a Parallel branch / Map iteration) successfully. + +```json +"Done": { + "Type": "Succeed", + "Output": { + "status": "completed", + "processedAt": "{% $now() %}" + } +} +``` + +Without `Output`, passes input through as output. No `Next` field allowed. + +--- + +## Fail State + +Terminates the state machine with an error. + +```json +"OrderFailed": { + "Type": "Fail", + "Error": "OrderValidationError", + "Cause": "The order could not be validated" +} +``` + +### Dynamic Error and Cause + +```json +"DynamicFail": { + "Type": "Fail", + "Error": "{% $states.input.errorCode %}", + "Cause": "{% $states.input.errorMessage %}" +} +``` + +Build rich, defensive error messages with fallbacks for missing fields: + +```json +"OrderProcessingFailed": { + "Type": "Fail", + "Error": "OrderProcessingError", + "Cause": "{% 'Failed to process order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown error') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details available') & '. Timestamp: ' & $now() %}" +} +``` + +No `Next`, `End`, `Output`, or `Assign` fields. Fail states are always terminal. + +--- + +## Parallel State + +Executes multiple branches concurrently. All branches receive the same input. + +```json +"LookupCustomerInfo": { + "Type": "Parallel", + "Arguments": { + "customerId": "{% $states.input.customerId %}" + }, + "Branches": [ + { + "StartAt": "GetAddress", + "States": { + "GetAddress": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetAddress:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + { + "StartAt": "GetOrders", + "States": { + "GetOrders": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetOrders:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + } + ], + "Assign": { + "address": "{% $states.result[0] %}", + "orders": "{% $states.result[1] %}" + }, + "Output": { + "address": "{% $states.result[0] %}", + "orders": "{% $states.result[1] %}" + }, + "Next": "ProcessResults" +} +``` + +Key points: +- `Arguments` provides input to each branch's StartAt state (optional, defaults to state input). +- Result is an array with one element per branch, in the same order as `Branches`. +- If any branch fails, the entire Parallel state fails (unless caught). +- States inside branches can only transition to other states within the same branch. +- Branch variables are scoped — branches cannot access each other's variables. +- Use `Output` on terminal states within branches to pass data back to the outer scope. + +--- + +## Map State + +Iterates over an array, processing each element (potentially in parallel). + +### Basic Map + +```json +"ProcessItems": { + "Type": "Map", + "Items": "{% $states.input.orders %}", + "MaxConcurrency": 10, + "ItemProcessor": { + "StartAt": "ProcessOrder", + "States": { + "ProcessOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessOrder:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + "Output": "{% $states.result %}", + "Next": "AllDone" +} +``` + +### Map with ItemSelector + +Use `ItemSelector` to reshape each item before processing: + +```json +"ProcessItems": { + "Type": "Map", + "Items": "{% $states.input.detail.shipped %}", + "ItemSelector": { + "parcel": "{% $states.context.Map.Item.Value %}", + "index": "{% $states.context.Map.Item.Index %}", + "courier": "{% $states.input.detail.delivery-partner %}" + }, + "MaxConcurrency": 0, + "ItemProcessor": { + "StartAt": "Ship", + "States": { + "Ship": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ShipItem:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + "Next": "Done" +} +``` + +### Map Context Variables + +Inside `ItemSelector`, you can access: +- `$states.context.Map.Item.Value` — the current array element +- `$states.context.Map.Item.Index` — the zero-based index + +### Key Map Fields + +| Field | Description | +|-------|-------------| +| `Items` | JSON array or JSONata expression evaluating to an array | +| `ItemProcessor` | State machine to run for each item (has `StartAt` and `States`) | +| `ItemSelector` | Reshape each item before processing | +| `MaxConcurrency` | Max parallel iterations (0 = unlimited, 1 = sequential) | +| `ToleratedFailurePercentage` | 0-100, percentage of items allowed to fail | +| `ToleratedFailureCount` | Number of items allowed to fail | +| `ItemReader` | Read items from an external resource | +| `ItemBatcher` | Batch items into sub-arrays | +| `ResultWriter` | Write results to an external resource | + +### ProcessorConfig + +The `ItemProcessor` can include a `ProcessorConfig` to control execution mode: + +```json +"ItemProcessor": { + "ProcessorConfig": { + "Mode": "INLINE" + }, + "StartAt": "ProcessOrder", + "States": { ... } +} +``` + +- `INLINE` (default) — iterations run within the parent execution. Use for most cases. +- `DISTRIBUTED` — iterations run as child executions. Use for large-scale processing (thousands+ items), items read from S3, or when you need per-iteration execution history. + +### Failure Tolerance + +```json +"ProcessWithTolerance": { + "Type": "Map", + "Items": "{% $states.input.records %}", + "ToleratedFailurePercentage": 10, + "ToleratedFailureCount": 5, + "ItemProcessor": { ... }, + "Next": "Done" +} +``` + +The Map state fails if either threshold is breached. + +--- \ No newline at end of file diff --git a/aws-step-functions-jsonata/steering/error-handling.md b/aws-step-functions-jsonata/steering/error-handling.md new file mode 100644 index 0000000..b27ad18 --- /dev/null +++ b/aws-step-functions-jsonata/steering/error-handling.md @@ -0,0 +1,445 @@ +# Error Handling in JSONata Mode + +## Overview + +When a state encounters an error, Step Functions defaults to failing the entire execution. You can override this with `Retry` (retry the failed state) and `Catch` (transition to a fallback state). + +`Retry` and `Catch` are available on: Task, Parallel, and Map states. + +## Error Names + +Errors are identified by case-sensitive strings. Step Functions defines these built-in error codes: + +| Error Code | Description | +|-----------|-------------| +| `States.ALL` | Wildcard — matches any error | +| `States.Timeout` | Task exceeded `TimeoutSeconds` or missed heartbeat | +| `States.HeartbeatTimeout` | Task missed heartbeat interval | +| `States.TaskFailed` | Task failed during execution | +| `States.Permissions` | Insufficient privileges | +| `States.ResultPathMatchFailure` | ResultPath cannot be applied (JSONPath only) | +| `States.ParameterPathFailure` | Parameter path resolution failed (JSONPath only) | +| `States.QueryEvaluationError` | JSONata expression evaluation failed | +| `States.BranchFailed` | A Parallel state branch failed | +| `States.NoChoiceMatched` | No Choice rule matched and no Default | +| `States.IntrinsicFailure` | Intrinsic function failed (JSONPath only) | +| `States.ExceedToleratedFailureThreshold` | Map state exceeded failure tolerance | +| `States.ItemReaderFailed` | Map state ItemReader failed | +| `States.ResultWriterFailed` | Map state ResultWriter failed | + +Custom error names are allowed but must NOT start with `States.`. + +--- + +## Retry + +The `Retry` field is an array of Retrier objects. The interpreter scans retriers in order and uses the first one whose `ErrorEquals` matches. + +### Retrier Fields + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `ErrorEquals` | string[] | Required | Error names to match | +| `IntervalSeconds` | integer | 1 | Seconds before first retry | +| `MaxAttempts` | integer | 3 | Maximum retry attempts (0 = never retry) | +| `BackoffRate` | number | 2.0 | Multiplier for retry interval (must be ≥ 1.0) | +| `MaxDelaySeconds` | integer | — | Cap on retry interval | +| `JitterStrategy` | string | — | Jitter strategy (e.g., `"FULL"`) | + +### Basic Retry + +```json +"ProcessPayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Pay:$LATEST", + "Payload": "{% $states.input %}" + }, + "Retry": [ + { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 2, + "MaxAttempts": 3, + "BackoffRate": 2.0 + } + ], + "Next": "Confirm" +} +``` + +This retries after 2s, 4s, 8s (3 attempts with 2x backoff). + +### Retry with Max Delay and Jitter + +```json +"Retry": [ + { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 1, + "MaxAttempts": 5, + "BackoffRate": 2.0, + "MaxDelaySeconds": 30, + "JitterStrategy": "FULL" + } +] +``` + +### Multiple Retriers + +Retriers are evaluated in order. Each retrier tracks its own attempt count independently: + +```json +"Retry": [ + { + "ErrorEquals": ["ThrottlingException"], + "IntervalSeconds": 1, + "MaxAttempts": 5, + "BackoffRate": 2.0, + "JitterStrategy": "FULL" + }, + { + "ErrorEquals": ["States.Timeout"], + "MaxAttempts": 0 + }, + { + "ErrorEquals": ["States.ALL"], + "IntervalSeconds": 3, + "MaxAttempts": 2, + "BackoffRate": 1.5 + } +] +``` + +Rules: +- `States.ALL` must appear alone in its `ErrorEquals` array. +- `States.ALL` must be in the last retrier. +- `MaxAttempts: 0` means "never retry this error." +- Retrier attempt counts reset when the interpreter transitions to another state. + +--- + +## Catch + +The `Catch` field is an array of Catcher objects. After retries are exhausted (or if no retrier matches), the interpreter scans catchers in order. + +### Catcher Fields (JSONata) + +| Field | Type | Description | +|-------|------|-------------| +| `ErrorEquals` | string[] | Required. Error names to match | +| `Next` | string | Required. State to transition to | +| `Output` | any | Optional. Transform the error output | +| `Assign` | object | Optional. Assign variables from error context | + +### Basic Catch + +```json +"ProcessOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", + "Payload": "{% $states.input %}" + }, + "Catch": [ + { + "ErrorEquals": ["ValidationError"], + "Output": { + "error": "{% $states.errorOutput.Error %}", + "cause": "{% $states.errorOutput.Cause %}", + "originalInput": "{% $states.input %}" + }, + "Next": "HandleValidationError" + }, + { + "ErrorEquals": ["States.ALL"], + "Output": "{% $states.errorOutput %}", + "Next": "HandleGenericError" + } + ], + "Next": "Success" +} +``` + +### Error Output Structure + +When a state fails and matches a Catcher, the Error Output is a JSON object with: +- `Error` (string) — the error name +- `Cause` (string) — human-readable error description + +```json +{ + "Error": "States.TaskFailed", + "Cause": "Lambda function returned an error" +} +``` + +### Catch with Variable Assignment + +```json +"Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "hasError": true, + "errorType": "{% $states.errorOutput.Error %}", + "errorMessage": "{% $states.errorOutput.Cause %}" + }, + "Output": "{% $merge([$states.input, {'error': $states.errorOutput}]) %}", + "Next": "ErrorHandler" + } +] +``` + +In a Catch block, `Assign` and `Output` can reference: +- `$states.input` — the original state input +- `$states.errorOutput` — the error details +- `$states.context` — execution context + +If a Catcher matches, the state's top-level `Assign` is NOT evaluated — only the Catcher's `Assign` runs. + +### Catch Without Output + +If no `Output` is provided in the Catcher, the state output is the raw Error Output object. + +### Building Rich Error Context for Fail States + +A user-friendly pattern is to capture error details into a variable via Catch `Assign`, then reference that variable in a Fail state's `Cause` with defensive fallbacks: + +```json +"ChargePayment": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { ... }, + "Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "error": "{% $states.errorOutput %}" + }, + "Next": "PaymentFailed" + } + ], + "Next": "ConfirmOrder" +}, +"PaymentFailed": { + "Type": "Fail", + "Error": "PaymentError", + "Cause": "{% 'Payment failed for order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details') & '. Timestamp: ' & $now() %}" +} +``` + +Always guard with `$exists()` — if the variable was never assigned (e.g., the Catch didn't fire for that path), referencing it directly throws `States.QueryEvaluationError`. + +--- + +## Combined Retry and Catch + +When both are present, retries are attempted first. Only if retries are exhausted does the Catch apply: + +```json +"CallExternalAPI": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallAPI:$LATEST", + "Payload": "{% $states.input %}" + }, + "Retry": [ + { + "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], + "IntervalSeconds": 2, + "MaxAttempts": 3, + "BackoffRate": 2.0, + "JitterStrategy": "FULL" + }, + { + "ErrorEquals": ["States.Timeout"], + "IntervalSeconds": 5, + "MaxAttempts": 2 + } + ], + "Catch": [ + { + "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], + "Assign": { + "retryExhausted": true + }, + "Output": { + "error": "Service temporarily unavailable after retries", + "details": "{% $states.errorOutput %}" + }, + "Next": "NotifyAndRetryLater" + }, + { + "ErrorEquals": ["States.ALL"], + "Output": { + "error": "{% $states.errorOutput %}", + "input": "{% $states.input %}" + }, + "Next": "FatalErrorHandler" + } + ], + "Output": "{% $states.result.Payload %}", + "Next": "ProcessResponse" +} +``` + +--- + +## Handling States.QueryEvaluationError + +JSONata expressions can fail at runtime. Common causes: + +1. **Type error**: `{% $x + $y %}` where `$x` or `$y` is not a number +2. **Type incompatibility**: `"TimeoutSeconds": "{% $name %}"` where `$name` is a string +3. **Value out of range**: Negative number for `TimeoutSeconds` +4. **Undefined result**: `{% $data.nonExistentField %}` — JSON cannot represent undefined + +All of these throw `States.QueryEvaluationError`. Handle it like any other error: + +```json +"Retry": [ + { + "ErrorEquals": ["States.QueryEvaluationError"], + "MaxAttempts": 0 + } +], +"Catch": [ + { + "ErrorEquals": ["States.QueryEvaluationError"], + "Output": { + "error": "Data transformation failed", + "details": "{% $states.errorOutput %}" + }, + "Next": "HandleDataError" + } +] +``` + +### Preventing QueryEvaluationError + +Use defensive JSONata expressions: + +```json +"Output": { + "name": "{% $exists($states.input.name) ? $states.input.name : 'Unknown' %}", + "total": "{% $type($states.input.amount) = 'number' ? $states.input.amount : 0 %}" +} +``` + +Watch out for single-value vs array results from filters. JSONata returns a single object (not a 1-element array) when a filter matches exactly one item, and undefined when nothing matches. Both cases will throw `States.QueryEvaluationError` if you pass the result to array-expecting functions like `$count`, `$map`, or a Map state `Items` field. + +Guard filtered results before using them: + +```json +"Assign": { + "pendingOrders": "{% ($filtered := $states.input.orders[status = 'pending']; $type($filtered) = 'array' ? $filtered : $exists($filtered) ? [$filtered] : []) %}" +} +``` + +This ensures `$pendingOrders` is always an array regardless of how many items matched. + +--- + +## Error Handling in Parallel States + +If any branch fails, the entire Parallel state fails. Catch the error at the Parallel state level: + +```json +"ParallelWork": { + "Type": "Parallel", + "Branches": [ ... ], + "Retry": [ + { + "ErrorEquals": ["States.BranchFailed"], + "MaxAttempts": 1 + } + ], + "Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Output": { + "error": "{% $states.errorOutput %}", + "failedAt": "parallel execution" + }, + "Next": "HandleParallelError" + } + ], + "Next": "Continue" +} +``` + +--- + +## Error Handling in Map States + +Individual iteration failures can be tolerated: + +```json +"ProcessAll": { + "Type": "Map", + "Items": "{% $states.input.records %}", + "ToleratedFailurePercentage": 10, + "ItemProcessor": { ... }, + "Catch": [ + { + "ErrorEquals": ["States.ExceedToleratedFailureThreshold"], + "Output": { + "error": "Too many items failed", + "details": "{% $states.errorOutput %}" + }, + "Next": "HandleBatchFailure" + }, + { + "ErrorEquals": ["States.ALL"], + "Next": "HandleMapError" + } + ], + "Next": "Done" +} +``` + +--- + +## Common Error Handling Patterns + +### Circuit Breaker with Variables + +```json +"CheckRetryCount": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $retryCount >= $maxRetries %}", + "Next": "MaxRetriesExceeded" + } + ], + "Default": "AttemptOperation" +}, +"AttemptOperation": { + "Type": "Task", + "Resource": "...", + "Assign": { + "retryCount": "{% $retryCount + 1 %}" + }, + "Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "retryCount": "{% $retryCount + 1 %}", + "lastError": "{% $states.errorOutput %}" + }, + "Next": "WaitBeforeRetry" + } + ], + "Next": "Success" +}, +"WaitBeforeRetry": { + "Type": "Wait", + "Seconds": "{% $power(2, $retryCount) %}", + "Next": "CheckRetryCount" +} +``` + diff --git a/aws-step-functions-jsonata/steering/service-integrations.md b/aws-step-functions-jsonata/steering/service-integrations.md new file mode 100644 index 0000000..490c104 --- /dev/null +++ b/aws-step-functions-jsonata/steering/service-integrations.md @@ -0,0 +1,485 @@ +# Service Integrations in JSONata Mode + +## Integration Types + +Step Functions can integrate with AWS services in three patterns: + +1. **Optimized integrations** — Purpose-built, recommended where available (e.g., Lambda, DynamoDB, SNS, SQS, ECS, Glue, SageMaker, etc.) +2. **AWS SDK integrations** — Call any AWS SDK API action directly +3. **HTTP Task** — Call HTTPS APIs (e.g., Stripe, Salesforce) + +### Resource ARN Patterns + +``` +# Optimized integration +"Resource": "arn:aws:states:::servicename:apiAction" + +# Optimized integration (synchronous — wait for completion) +"Resource": "arn:aws:states:::servicename:apiAction.sync" + +# Optimized integration (wait for callback token) +"Resource": "arn:aws:states:::servicename:apiAction.waitForTaskToken" + +# AWS SDK integration +"Resource": "arn:aws:states:::aws-sdk:serviceName:apiAction" +``` + +--- + +## Lambda Function + +### Optimized Integration (Recommended) + +```json +"InvokeFunction": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunction:$LATEST", + "Payload": { + "orderId": "{% $states.input.orderId %}", + "customer": "{% $states.input.customer %}" + } + }, + "Output": "{% $states.result.Payload %}", + "Next": "NextState" +} +``` + +Always include a version qualifier (`:$LATEST`, `:1`, or an alias like `:prod`) on the function ARN. + +The result is wrapped in a `Payload` field, so use `$states.result.Payload` to access the Lambda return value. + +### SDK Integration + +```json +"InvokeViaSDK": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunction", + "Payload": "{% $string($states.input) %}" + }, + "Next": "NextState" +} +``` + +--- + +## DynamoDB + +### GetItem + +```json +"GetUser": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "UsersTable", + "Key": { + "userId": { + "S": "{% $states.input.userId %}" + } + } + }, + "Assign": { + "user": "{% $states.result.Item %}" + }, + "Output": "{% $states.result.Item %}", + "Next": "ProcessUser" +} +``` + +### PutItem + +```json +"SaveOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Arguments": { + "TableName": "OrdersTable", + "Item": { + "orderId": { + "S": "{% $orderId %}" + }, + "status": { + "S": "processing" + }, + "total": { + "N": "{% $string($states.input.total) %}" + }, + "createdAt": { + "S": "{% $now() %}" + } + } + }, + "Next": "ProcessOrder" +} +``` + +### UpdateItem + +```json +"UpdateStatus": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Arguments": { + "TableName": "OrdersTable", + "Key": { + "orderId": { + "S": "{% $orderId %}" + } + }, + "UpdateExpression": "SET #s = :status, updatedAt = :time", + "ExpressionAttributeNames": { + "#s": "status" + }, + "ExpressionAttributeValues": { + ":status": { + "S": "{% $states.input.newStatus %}" + }, + ":time": { + "S": "{% $now() %}" + } + } + }, + "Next": "Done" +} +``` + +### Query + +```json +"QueryOrders": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:dynamodb:query", + "Arguments": { + "TableName": "OrdersTable", + "KeyConditionExpression": "customerId = :cid", + "ExpressionAttributeValues": { + ":cid": { + "S": "{% $states.input.customerId %}" + } + } + }, + "Output": "{% $states.result.Items %}", + "Next": "ProcessOrders" +} +``` + +--- + +## SNS (Simple Notification Service) + +### Publish Message + +```json +"SendNotification": { + "Type": "Task", + "Resource": "arn:aws:states:::sns:publish", + "Arguments": { + "TopicArn": "arn:aws:sns:us-east-1:123456789012:OrderNotifications", + "Message": "{% 'Order ' & $orderId & ' has been processed successfully.' %}", + "Subject": "Order Confirmation" + }, + "Next": "Done" +} +``` + +### Publish with JSON Message + +```json +"SendStructuredNotification": { + "Type": "Task", + "Resource": "arn:aws:states:::sns:publish", + "Arguments": { + "TopicArn": "arn:aws:sns:us-east-1:123456789012:Alerts", + "Message": "{% $string({'orderId': $orderId, 'status': $states.input.status, 'timestamp': $now()}) %}" + }, + "Next": "Done" +} +``` + +--- + +## SQS (Simple Queue Service) + +### Send Message + +```json +"QueueMessage": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ProcessingQueue", + "MessageBody": "{% $string($states.input) %}" + }, + "Next": "Done" +} +``` + +### Send Message with Wait for Task Token + +```json +"WaitForApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'amount': $states.input.amount}) %}" + }, + "TimeoutSeconds": 86400, + "Next": "ProcessApproval" +} +``` + +The execution pauses until an external system calls `SendTaskSuccess` or `SendTaskFailure` with the task token. + +--- + +## Step Functions (Nested Execution) + +### Start Execution (Synchronous) + +```json +"RunSubWorkflow": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Arguments": { + "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:ChildWorkflow", + "Input": "{% $states.input %}" + }, + "Output": "{% $parse($states.result.Output) %}", + "Next": "ProcessSubResult" +} +``` + +Note: The `.sync:2` suffix waits for completion. The child output is a JSON string in `$states.result.Output`, so use `$parse()` to deserialize it. + +### Start Execution (Async — Fire and Forget) + +```json +"StartAsync": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution", + "Arguments": { + "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:AsyncWorkflow", + "Input": "{% $string($states.input) %}" + }, + "Next": "Continue" +} +``` + +--- + +## EventBridge + +### Put Events + +```json +"EmitEvent": { + "Type": "Task", + "Resource": "arn:aws:states:::events:putEvents", + "Arguments": { + "Entries": [ + { + "Source": "my.application", + "DetailType": "OrderProcessed", + "Detail": "{% $string({'orderId': $orderId, 'status': 'completed'}) %}", + "EventBusName": "default" + } + ] + }, + "Next": "Done" +} +``` + +--- + +## ECS / Fargate + +### Run Task (Synchronous) + +```json +"RunContainer": { + "Type": "Task", + "Resource": "arn:aws:states:::ecs:runTask.sync", + "Arguments": { + "LaunchType": "FARGATE", + "Cluster": "arn:aws:ecs:us-east-1:123456789012:cluster/MyCluster", + "TaskDefinition": "arn:aws:ecs:us-east-1:123456789012:task-definition/MyTask:1", + "NetworkConfiguration": { + "AwsvpcConfiguration": { + "Subnets": ["subnet-abc123"], + "SecurityGroups": ["sg-abc123"], + "AssignPublicIp": "ENABLED" + } + }, + "Overrides": { + "ContainerOverrides": [ + { + "Name": "my-container", + "Environment": [ + { + "Name": "ORDER_ID", + "Value": "{% $orderId %}" + } + ] + } + ] + } + }, + "TimeoutSeconds": 600, + "Next": "Done" +} +``` + +--- + +## AWS Glue + +### Start Job Run (Synchronous) + +```json +"RunGlueJob": { + "Type": "Task", + "Resource": "arn:aws:states:::glue:startJobRun.sync", + "Arguments": { + "JobName": "my-etl-job", + "Arguments": { + "--input_path": "{% $states.input.inputPath %}", + "--output_path": "{% $states.input.outputPath %}" + } + }, + "TimeoutSeconds": 3600, + "Next": "Done" +} +``` + +--- + +## Amazon Bedrock + +### Invoke Model + +```json +"InvokeModel": { + "Type": "Task", + "Resource": "arn:aws:states:::bedrock:invokeModel", + "Arguments": { + "ModelId": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", + "ContentType": "application/json", + "Accept": "application/json", + "Body": { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "{% $states.input.prompt %}" + } + ] + } + }, + "Output": "{% $states.result.Body %}", + "Next": "ProcessResponse" +} +``` + +--- + +## S3 + +### GetObject + +```json +"ReadFile": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:s3:getObject", + "Arguments": { + "Bucket": "my-bucket", + "Key": "{% $states.input.filePath %}" + }, + "Output": "{% $states.result.Body %}", + "Next": "ProcessFile" +} +``` + +### PutObject + +```json +"WriteFile": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:s3:putObject", + "Arguments": { + "Bucket": "my-bucket", + "Key": "{% 'results/' & $orderId & '.json' %}", + "Body": "{% $string($states.input.results) %}" + }, + "Next": "Done" +} +``` + +--- + +## Cross-Account Access + +Use the `Credentials` field to assume a role in another account: + +```json +"CrossAccountCall": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Credentials": { + "RoleArn": "arn:aws:iam::111122223333:role/CrossAccountRole" + }, + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:111122223333:function:RemoteFunction:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "Done" +} +``` + +--- + +## Synchronous vs Asynchronous Patterns + +| Pattern | Resource Suffix | Behavior | +|---------|----------------|----------| +| Request-Response | (none) | Call API and continue immediately | +| Synchronous | `.sync` | Wait for task to complete | +| Wait for Callback | `.waitForTaskToken` | Pause until external callback | + +### When to Use Each + +- **Request-Response**: Fire-and-forget operations (start a process, send a message) +- **Synchronous (`.sync`)**: When you need the result before continuing (run ECS task, execute child workflow, run Glue job) +- **Wait for Callback (`.waitForTaskToken`)**: Human approval, external system processing, long-running async operations + +### Callback Pattern Example + +```json +"WaitForHumanApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'request': $states.input}) %}" + }, + "TimeoutSeconds": 604800, + "Catch": [ + { + "ErrorEquals": ["States.Timeout"], + "Output": { + "status": "approval_timeout" + }, + "Next": "HandleTimeout" + } + ], + "Next": "ApprovalReceived" +} +``` + +The external system must call `SendTaskSuccess` or `SendTaskFailure` with the task token to resume execution. diff --git a/aws-step-functions-jsonata/steering/variables-and-data.md b/aws-step-functions-jsonata/steering/variables-and-data.md new file mode 100644 index 0000000..8e4339b --- /dev/null +++ b/aws-step-functions-jsonata/steering/variables-and-data.md @@ -0,0 +1,498 @@ +# Variables and Data Transformation (JSONata Mode) + +## JSONata Expression Syntax + +JSONata expressions are written inside `{% %}` delimiters in string values: + +```json +"Output": "{% $states.input.customer.name %}" +"TimeoutSeconds": "{% $timeout %}" +"Condition": "{% $states.input.age >= 18 %}" +``` + +Rules: +- The string must start with `{%` (no leading spaces) and end with `%}` (no trailing spaces). +- Not all fields accept JSONata — `Type` and `Resource` must be constant strings. +- JSONata expressions can appear in string values within objects and arrays at any nesting depth. +- A string without `{% %}` is treated as a literal value. +- All string literals inside JSONata expressions must use single quotes (`'text'`), not double quotes. The expression is already inside a JSON double-quoted string, so double quotes would break the JSON. +- Use `:=` inside `( ... )` blocks to bind local variables within a single expression. These are expression-local only — they do NOT set state machine variables (use `Assign` for that). +- Complex logic is wrapped in `( expr1; expr2; ...; finalExpr )` where semicolons separate sequential expressions and the last expression is the return value. + +### String Quoting + +```json +"Output": "{% 'Hello ' & $states.input.name %}" +"Condition": "{% $states.input.status = 'active' %}" +``` + +Never use double quotes inside the expression: +``` +❌ "Output": "{% "Hello" %}" +✓ "Output": "{% 'Hello' %}" +``` + +### Local Variable Binding with `:=` + +Use `:=` inside `( ... )` blocks to bind intermediate values within a single JSONata expression. Semicolons separate each binding, and the last expression is the return value: + +```json +"Output": "{% ( $subtotal := $sum($states.input.items.price); $tax := $subtotal * 0.1; $discount := $exists($couponValue) ? $couponValue : 0; {'subtotal': $subtotal, 'tax': $tax, 'discount': $discount, 'total': $subtotal + $tax - $discount} ) %}" +``` + +You can also define local helper functions: + +```json +"Assign": { + "summary": "{% ( $formatPrice := function($amt) { '$' & $formatNumber($amt, '#,##0.00') }; $subtotal := $sum($states.input.items.price); {'itemCount': $count($states.input.items), 'subtotal': $formatPrice($subtotal), 'total': $formatPrice($subtotal * 1.1)} ) %}" +} +``` + +Local variables bound with `:=` exist only within the `( ... )` block. They do not affect state machine variables. To persist values across states, use the `Assign` field. + +## The `$states` Reserved Variable + +Step Functions provides a reserved `$states` variable in every JSONata state: + +``` +$states = { + "input": // Original input to the state + "result": // Task/Parallel/Map result (if successful) + "errorOutput": // Error Output (only available in Catch) + "context": // Context object (execution metadata) +} +``` + +### Where Each Field Is Accessible + +| Field | Accessible In | +|-------|--------------| +| `$states.input` | All fields that accept JSONata, in any state | +| `$states.result` | Top-level `Output` and `Assign` in Task, Parallel, Map states | +| `$states.errorOutput` | `Output` and `Assign` inside a `Catch` block | +| `$states.context` | All fields that accept JSONata, in any state | + +### Context Object + +`$states.context` provides execution metadata: + +```json +"executionId": "{% $states.context.Execution.Id %}", +"startTime": "{% $states.context.Execution.StartTime %}", +"stateName": "{% $states.context.State.Name %}", +"originalInput": "{% $states.context.Execution.Input %}" +``` + +Useful context fields: +- `$states.context.Execution.Id` — Execution ARN +- `$states.context.Execution.Input` — Original workflow input +- `$states.context.Execution.Name` — Execution name +- `$states.context.Execution.StartTime` — When execution started +- `$states.context.State.Name` — Current state name +- `$states.context.State.EnteredTime` — When current state was entered +- `$states.context.StateMachine.Id` — State machine ARN +- `$states.context.StateMachine.Name` — State machine name + +Inside Map state `ItemSelector`: +- `$states.context.Map.Item.Value` — Current array element +- `$states.context.Map.Item.Index` — Zero-based index + +## JSONata Restrictions in Step Functions + +1. **No `$` or `$$` at top level**: You cannot use `$` or `$$` to reference an implicit input document. Use `$states.input` instead. + - Invalid: `"Output": "{% $.name %}"` (top-level `$`) + - Valid: `"Output": "{% $states.input.name %}"` + - Valid inside expressions: `"Output": "{% $states.input.items[$.price > 10] %}"` (nested `$` is OK) + +2. **No unqualified field names at top level**: Use variables or `$states.input`. + - Invalid: `"Output": "{% name %}"` (unqualified) + - Valid: `"Output": "{% $states.input.name %}"` + +3. **No `$eval`**: Use `$parse()` instead for deserializing JSON strings. + +4. **Expressions must produce a defined value**: `$data.nonExistentField` throws `States.QueryEvaluationError` because JSON cannot represent undefined. + +--- + +## Workflow Variables with `Assign` + +Variables let you store data in one state and reference it in any subsequent state, without threading data through Output/Input chains. + +### Declaring Variables + +```json +"StoreData": { + "Type": "Pass", + "Assign": { + "productName": "product1", + "count": 42, + "available": true, + "config": "{% $states.input.configuration %}" + }, + "Next": "UseData" +} +``` + +### Referencing Variables + +Prepend the variable name with `$`: + +```json +"Arguments": { + "product": "{% $productName %}", + "quantity": "{% $count %}" +} +``` + +### Assigning from Task Results + +```json +"FetchPrice": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetPrice:$LATEST", + "Payload": { + "product": "{% $states.input.product %}" + } + }, + "Assign": { + "currentPrice": "{% $states.result.Payload.price %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "CheckPrice" +} +``` + +### States That Support Assign + +Pass, Task, Map, Parallel, Choice, Wait — all support `Assign`. + +Succeed and Fail do NOT support `Assign`. + +### Assign in Choice Rules and Catch + +Choice Rules and Catch blocks can each have their own `Assign`: + +```json +"CheckValue": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $states.input.value > 100 %}", + "Assign": { + "tier": "premium" + }, + "Next": "PremiumPath" + } + ], + "Default": "StandardPath", + "Assign": { + "tier": "standard" + } +} +``` + +If a Choice Rule matches, its `Assign` is used. If no rule matches, the state-level `Assign` is used. + +--- + +## Variable Evaluation Order + +All expressions in `Assign` are evaluated using variable values as they were on state entry. New values only take effect in the next state. + +```json +"SwapExample": { + "Type": "Pass", + "Assign": { + "x": "{% $y %}", + "y": "{% $x %}" + }, + "Next": "AfterSwap" +} +``` + +If `$x = 3` and `$y = 6` on entry, after this state: `$x = 6`, `$y = 3`. This works because all expressions are evaluated first, then assignments are made. + +You cannot assign to a sub-path of a variable: +- Valid: `"Assign": {"x": 42}` +- Invalid: `"Assign": {"x.y": 42}` or `"Assign": {"x[2]": 42}` + +--- + +## Variable Scope + +Variables exist in a state-machine-local scope: + +- **Outer scope**: All states in the top-level `States` field. +- **Inner scope**: States inside a Parallel branch or Map iteration. + +### Scope Rules + +1. Inner scopes can READ variables from outer scopes. +2. Inner scopes CANNOT ASSIGN to variables that exist in an outer scope. +3. Variable names must be unique across outer and inner scopes (no shadowing). +4. Variables in different Parallel branches or Map iterations are isolated from each other. +5. When a Parallel branch or Map iteration completes, its variables go out of scope. +6. Exception: Distributed Map states cannot reference variables in outer scopes. + +### Passing Data Out of Inner Scopes + +Use `Output` on terminal states within branches/iterations to return data to the outer scope: + +```json +"ParallelWork": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "BranchA", + "States": { + "BranchA": { + "Type": "Task", + "Resource": "...", + "Output": "{% $states.result.Payload %}", + "End": true + } + } + } + ], + "Assign": { + "branchAResult": "{% $states.result[0] %}" + }, + "Next": "Continue" +} +``` + +### Catch Assign and Outer Scope + +In a Catch block on a Parallel or Map state, `Assign` can assign values to variables in the outer scope (the scope where the Parallel/Map state exists): + +```json +"Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "errorOccurred": true, + "errorDetails": "{% $states.errorOutput %}" + }, + "Next": "HandleError" + } +] +``` + +--- + +## Arguments and Output Fields + +### Arguments + +Provides input to Task and Parallel states (replaces JSONPath `Parameters`): + +```json +"Arguments": { + "staticField": "hello", + "dynamicField": "{% $states.input.name %}", + "computed": "{% $count($states.input.items) %}" +} +``` + +Or as a single JSONata expression: + +```json +"Arguments": "{% $states.input.payload %}" +``` + +`Arguments` can reference `$states.input` and `$states.context`, but NOT `$states.result` or `$states.errorOutput`. + +### Output + +Transforms the state output (replaces JSONPath `ResultSelector` + `ResultPath` + `OutputPath`): + +```json +"Output": { + "customerId": "{% $states.input.id %}", + "result": "{% $states.result.Payload %}", + "processedAt": "{% $now() %}" +} +``` + +Or as a single expression or literal value: + +```json +"Output": "{% $states.result.Payload %}" +"Output": 42 +"Output": { "status": "done" } +``` + +If `Output` is not provided: +- Task, Parallel, Map: state output = the result +- All other states: state output = the state input + +### Assign and Output Are Parallel + +`Assign` and `Output` are evaluated in parallel. Variable assignments in `Assign` are NOT available in `Output` of the same state — you must re-derive values in both if needed: + +```json +"Assign": { + "savedPrice": "{% $states.result.Payload.price %}" +}, +"Output": { + "price": "{% $states.result.Payload.price %}" +} +``` + +--- + +## Variable Limits + +| Limit | Value | +|-------|-------| +| Max size of a single variable | 256 KiB | +| Max combined size in a single Assign | 256 KiB | +| Max total stored variables per execution | 10 MiB | +| Max variable name length | 80 Unicode characters | + +--- + +## Data Transformation Patterns + +### Filtering Arrays + +```json +"Output": { + "expensiveItems": "{% $states.input.items[price > 100] %}" +} +``` + +### Aggregation + +```json +"Output": { + "total": "{% $sum($states.input.items.price) %}", + "average": "{% $average($states.input.items.price) %}", + "count": "{% $count($states.input.items) %}" +} +``` + +### String Operations + +```json +"Output": { + "fullName": "{% $states.input.firstName & ' ' & $states.input.lastName %}", + "upper": "{% $uppercase($states.input.name) %}", + "trimmed": "{% $trim($states.input.rawInput) %}" +} +``` + +### Object Merging + +```json +"Output": "{% $merge([$states.input, {'processedAt': $now(), 'status': 'complete'}]) %}" +``` + +### Building Lookup Maps with `$reduce` + +Use `$reduce` to transform an array into a key-value object: + +```json +"Assign": { + "priceByProduct": "{% $reduce($states.input.items, function($acc, $item) { $merge([$acc, {$item.productId: $item.price}]) }, {}) %}" +} +``` + +Given `[{"productId": "A1", "price": 10}, {"productId": "B2", "price": 25}]`, this produces `{"A1": 10, "B2": 25}`. + +### Dynamic Key Access with `$lookup` + +Use `$lookup` to access an object property by a variable key: + +```json +"Output": { + "price": "{% $lookup($priceByProduct, $states.input.productId) %}" +} +``` + +This is essential when you've built a mapping object with `$reduce` and need to retrieve values dynamically. Standard dot notation (`$priceByProduct.someKey`) only works with literal key names. + +### Conditional Values + +```json +"Output": { + "tier": "{% $states.input.total > 1000 ? 'gold' : 'standard' %}", + "discount": "{% $exists($states.input.coupon) ? 0.1 : 0 %}" +} +``` + +### Array Membership with `in` and Concatenation with `$append` + +Test if a value exists in an array with `in`: + +```json +"Condition": "{% $states.input.status in ['pending', 'processing', 'shipped'] %}" +``` + +Concatenate arrays with `$append`: + +```json +"Assign": { + "allIds": "{% $append($states.input.orderIds, $states.input.returnIds) %}" +} +``` + +### Array Mapping + +```json +"Output": { + "names": "{% $states.input.users.(firstName & ' ' & lastName) %}" +} +``` + +### Generating UUIDs and Random Values + +```json +"Assign": { + "requestId": "{% $uuid() %}", + "randomValue": "{% $random() %}" +} +``` + +### Partitioning Arrays + +```json +"Assign": { + "batches": "{% $partition($states.input.items, 10) %}" +} +``` + +### Parsing JSON Strings + +```json +"Assign": { + "parsed": "{% $parse($states.input.jsonString) %}" +} +``` + +### Hashing + +```json +"Assign": { + "hash": "{% $hash($states.input.content, 'SHA-256') %}" +} +``` + +### Timestamp Comparison with `$toMillis` + +JSONata timestamps are strings, so you can't compare them directly with `<` or `>`. Use `$toMillis` to convert to numeric milliseconds: + +```json +"Condition": "{% $toMillis($states.input.orderDate) > $toMillis($states.input.cutoffDate) %}" +``` + +Useful for sorting timestamps, calculating durations, or finding the most recent entry: + +```json +"Assign": { + "ageMinutes": "{% $round(($toMillis($now()) - $toMillis($states.input.createdAt)) / 60000, 2) %}", + "mostRecent": "{% $sort($states.input.timestamps, function($a, $b) { $toMillis($b) - $toMillis($a) })[0] %}" +} +``` From 4806582e078d0559d4adf0e9c1e4888ab2f0d4fb Mon Sep 17 00:00:00 2001 From: Jeff Palmer Date: Fri, 27 Mar 2026 15:09:32 -0500 Subject: [PATCH 2/7] Added steering files for converting from JSONPath to JSONata and validation and testing Tested all code snippets with this power and fixed all discovered issues Reviewed POWER.md and updated it --- .kiro/hooks/update-readme.kiro.hook | 15 - aws-step-functions/POWER.md | 234 ++++++++ .../steering/architecture-patterns.md | 488 +++++++++++++++++ .../steering/asl-state-types.md | 474 +++++++++++++++++ .../converting-from-jsonpath-to-jsonata.md | 364 +++++++++++++ aws-step-functions/steering/error-handling.md | 445 ++++++++++++++++ .../steering/service-integrations.md | 485 +++++++++++++++++ .../steering/validation-and-testing.md | 444 ++++++++++++++++ .../steering/variables-and-data.md | 498 ++++++++++++++++++ 9 files changed, 3432 insertions(+), 15 deletions(-) delete mode 100644 .kiro/hooks/update-readme.kiro.hook create mode 100644 aws-step-functions/POWER.md create mode 100644 aws-step-functions/steering/architecture-patterns.md create mode 100644 aws-step-functions/steering/asl-state-types.md create mode 100644 aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md create mode 100644 aws-step-functions/steering/error-handling.md create mode 100644 aws-step-functions/steering/service-integrations.md create mode 100644 aws-step-functions/steering/validation-and-testing.md create mode 100644 aws-step-functions/steering/variables-and-data.md diff --git a/.kiro/hooks/update-readme.kiro.hook b/.kiro/hooks/update-readme.kiro.hook deleted file mode 100644 index 04308e3..0000000 --- a/.kiro/hooks/update-readme.kiro.hook +++ /dev/null @@ -1,15 +0,0 @@ -{ - "enabled": true, - "name": "Update README", - "description": "Monitors changes to README.md and triggers an agent to review and update the documentation", - "version": "1", - "when": { - "type": "userTriggered" - }, - "then": { - "type": "askAgent", - "prompt": "The README.md file contains details about each power in this repo. Please review the codebase and make changes to README.md to create, update or remove details regarding each power." - }, - "workspaceFolderName": "kiro-powers", - "shortName": "update-readme" -} \ No newline at end of file diff --git a/aws-step-functions/POWER.md b/aws-step-functions/POWER.md new file mode 100644 index 0000000..ba4cecc --- /dev/null +++ b/aws-step-functions/POWER.md @@ -0,0 +1,234 @@ +--- +name: "aws-step-functions" +displayName: "AWS Step Functions" +description: "Build AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, all state types, variables, data transformation, error handling, and service integrations." +keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow orchestration"] +author: "[Jeff Palmer](https://linkedin.com/in/jeffrey-palmer/)" +--- + +# AWS Step Functions + +AWS Step Functions provides visual workflow orchestration with native integrations to 9,000+ API actions across 200+ AWS services. Define workflows as state machines in Amazon States Language using the JSONata query language instead of legacy JSONPath. + +## Overview + +AWS Step Functions uses Amazon States Language (ASL) to define state machines as JSON. With AWS Step Functions, you can create workflows, also called State machines, to build distributed applications, automate processes, orchestrate microservices, and create data and machine learning pipelines. + +This power provides comprehensive guidance for writing state machines in ASL, covering: +- ASL structure and JSONata expression syntax +- Details on the eight available workflow states +- The `$states` reserved variable +- Workflow variables with `Assign` +- Error handling +- AWS Service integration patterns +- Data transformation and architecture examples +- Validation and testing of ASL structure +- How to migrate from JSONPath to JSONata + +## When to Load Steering Files + +Load the appropriate steering file based on what the user is working on: + +- **ASL structure**, **state types**, **Task**, **Pass**, **Choice**, **Wait**, **Succeed**, **Fail**, **Parallel**, **Map** → see `asl-state-types.md` +- **Variables**, **Assign**, **data passing**, **scope**, **$states**, **input**, **output**, **Arguments**, **Output**, **data transformation** → see `variables-and-data.md` +- **Error handling**, **Retry**, **Catch**, **fallback**, **error codes**, **States.Timeout**, **States.ALL** → see `error-handling.md` +- **Service integrations**, **Lambda invoke**, **DynamoDB**, **SNS**, **SQS**, **SDK integrations**, **Resource ARN**, **sync**, **async** → see `service-integrations.md` +- **Converting from JSONPath**, **migration**, **JSONPath to JSONata**, **InputPath**, **Parameters**, **ResultSelector**, **ResultPath**, **OutputPath**, **intrinsic functions**, **Iterator**, **payload template** → see `converting-from-jsonpath-to-jsonata.md` +- **Validation**, **linting**, **testing**, **TestState**, **test state**, **mock**, **mocking**, **unit test**, **inspection level**, **DEBUG**, **TRACE**, **validate state**, **test in isolation** → see `validation-and-testing.md` + +## Quick Reference + +### Standard vs Express Workflows + +| | Standard | Express | +| --------------------------------- | ------------------------------------ | ------------------------------------------- | +| **Max duration** | 1 year | 5 minutes | +| **Execution semantics** | Exactly-once | At-least-once (async) / At-most-once (sync) | +| **Execution history** | Retained 90 days, queryable via API | CloudWatch Logs only | +| **Max throughput** | 2,000 exec/sec | 100,000 exec/sec | +| **Pricing model** | Per state transition | Per execution count + duration | +| **`.sync` / `.waitForTaskToken`** | Supported | Not supported | +| **Best for** | Auditable, non-idempotent operations | High-volume, idempotent event processing | + +**Choose Standard** for: payment processing, order fulfillment, compliance workflows, anything that must never execute twice. + +**Choose Express** for: IoT data ingestion, streaming transformations, mobile backends, high-throughput short-lived processing. + +### Key State Types + +| State | Purpose | +| ------------------ | ------------------------------------------------------------------------------------ | +| `Task` | Execute work — invoke Lambda, call any AWS service via SDK integration | +| `Choice` | Branch based on input data conditions (no `Next` required on branches) | +| `Parallel` | Execute multiple branches concurrently; waits for all branches to complete | +| `Map` | Iterate over an array; use Distributed Map mode for up to 10M items from S3/DynamoDB | +| `Wait` | Pause for a fixed duration or until a specific timestamp | +| `Pass` | Pass input to output, optionally injecting or transforming data | +| `Succeed` / `Fail` | End execution successfully or with an error and cause | + +### Setting the State Machine Query Language + +JSONata is the modern, preferred way to reference and transform data in ASL. It replaces the five JSONPath I/O fields (`InputPath`, `Parameters`, `ResultSelector`, `ResultPath`, `OutputPath`) with just two: `Arguments` (inputs) and `Output`. + +**Enable at the top level** to apply to all states: + +```json +{ "QueryLanguage": "JSONata", "StartAt": "...", "States": {...} } +``` + +**Or per-state** to migrate from JSONPath incrementally: + +```json +{ "Type": "Task", "QueryLanguage": "JSONata", ... } +``` + +**JSONata Expression syntax** +ADD MORE COMPLEX EXAMPLE +Wrap expressions in `{% %}`: +```json +"Arguments": { + "userId": "{% $states.input.user.id %}", + "greeting": "{% 'Hello, ' & $states.input.user.name %}", + "total": "{% $sum($states.input.items.price) %}" +} +``` + +**Built-in Step Functions JSONata functions:** + +| Function | Purpose | +|----------|---------| +| `$partition(array, size)` | Partition array into chunks | +| `$range(start, end, step)` | Generate array of values | +| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | +| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | +| `$uuid()` | Generate v4 UUID | +| `$parse(jsonString)` | Deserialize JSON string | + +**JSONPath is still supported** and is the default if `QueryLanguage` is omitted — existing state machines do not need to be migrated. + +### The `$states` Reserved Variable (JSONata only) + +``` +$states.input → Original state input +$states.result → Task/Parallel/Map result (on success) +$states.errorOutput → Error output (only in Catch) +$states.context → Execution context object +``` + +### Key Fields in Step Functions (JSONata only) + +| Field | Purpose | Available In | +|-------|---------|-------------| +| `Arguments` | Input to task/branches | Task, Parallel | +| `Output` | Transform state output | All except Fail | +| `Assign` | Store workflow variables | All except Succeed, Fail | +| `Condition` | Boolean branching | Choice rules | +| `Items` | Array for iteration | Map | + +### Functions Provided by Step Functions (JSONata only) + +| Function | Purpose | +|----------|---------| +| `$partition(array, size)` | Partition array into chunks | +| `$range(start, end, step)` | Generate array of values | +| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | +| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | +| `$uuid()` | Generate v4 UUID | +| `$parse(jsonString)` | Deserialize JSON string | + +Plus all [built-in JSONata functions](https://github.com/jsonata-js/jsonata/tree/master/docs) + +### Minimal Complete Example + +```json +{ + "Comment": "Order processing workflow", + "QueryLanguage": "JSONata", + "StartAt": "ValidateOrder", + "States": { + "ValidateOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "OrdersTable", + "Key": { + "orderId": { + "S": "{% $states.input.orderId %}" + } + } + }, + "Assign": { + "orderId": "{% $states.input.orderId %}" + }, + "Output": "{% $states.result.Item %}", + "Next": "CheckStock" + }, + "CheckStock": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $states.input.inStock = true %}", + "Next": "ProcessPayment" + } + ], + "Default": "OutOfStock" + }, + "ProcessPayment": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/PaymentQueue", + "MessageBody": "{% $string({'orderId': $orderId, 'amount': $states.input.total.N}) %}" + }, + "Output": { + "orderId": "{% $orderId %}", + "messageId": "{% $states.result.MessageId %}" + }, + "Retry": [ + { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 2, + "MaxAttempts": 3, + "BackoffRate": 2.0 + } + ], + "End": true + }, + "OutOfStock": { + "Type": "Fail", + "Error": "OutOfStockError", + "Cause": "Requested item is out of stock" + } + } +} +``` + +## Best Practices + +- Set `"QueryLanguage": "JSONata"` at the top level for new state machines unless JSONPath is mandatory +- Keep `Output` minimal — only include what the state immediately after the current state needs +- Use `Assign` to store variables needed in later states instead of threading it through Output +- Use `$states.input` to reference original state input +- Remember: `Assign` and `Output` are evaluated in parallel — variable assignments in `Assign` are NOT available in `Output` of the same state +- All JSONata expressions must produce a defined value — `$data.nonExistentField` throws `States.QueryEvaluationError` +- Use `$states.context.Execution.Input` to access the original workflow input from any state +- Save state machine definitions with `.asl.json` extension when working outside the console +- Prefer the optimized Lambda integration (`arn:aws:states:::lambda:invoke`) over the SDK integration + +## Troubleshooting + +### Common Errors + +- `States.QueryEvaluationError` — JSONata expression failed. Check for type errors, undefined fields, or out-of-range values. +- Mixing JSONPath fields with JSONata fields in the same state. +- Using `$` or `$$` at the top level of a JSONata expression — use `$states.input` instead. +- Forgetting `{% %}` delimiters around JSONata expressions — the string will be treated as a literal. +- Assigning variables in `Assign` and expecting them in `Output` of the same state — new values only take effect in the next state. + +## Resources + +- [ASL Specification](https://states-language.net/spec.html) +- [Transforming data with JSONata in Step Functions](https://docs.aws.amazon.com/step-functions/latest/dg/transforming-data.html) +- [Passing data between states with variables](https://docs.aws.amazon.com/step-functions/latest/dg/workflow-variables.html) +- [JSONata documentation](https://docs.jsonata.org/overview.html) +- [Step Functions Developer Guide](https://docs.aws.amazon.com/step-functions/latest/dg/welcome.html) diff --git a/aws-step-functions/steering/architecture-patterns.md b/aws-step-functions/steering/architecture-patterns.md new file mode 100644 index 0000000..74e35f8 --- /dev/null +++ b/aws-step-functions/steering/architecture-patterns.md @@ -0,0 +1,488 @@ +# Architecture Patterns (JSONata Mode) + +## Polling Loop (Wait → Check → Choice) + +Many AWS operations are asynchronous — you start them and then poll until they complete. The pattern is: initial wait → call describe/status API → check result → short wait → loop back. + +```json +"SubmitOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/FulfillmentQueue", + "MessageBody": "{% $string({'orderId': $orderId, 'items': $states.input.items}) %}" + }, + "Assign": { "fulfillmentOrderId": "{% $orderId %}" }, + "Next": "InitialWaitForFulfillment" +}, +"InitialWaitForFulfillment": { + "Type": "Wait", + "Seconds": 300, + "Next": "CheckFulfillmentStatus" +}, +"CheckFulfillmentStatus": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "OrdersTable", + "Key": { "orderId": { "S": "{% $fulfillmentOrderId %}" } } + }, + "Assign": { "orderStatus": "{% $states.result.Item.status.S %}" }, + "Next": "EvaluateFulfillment", + "Retry": [ + { "ErrorEquals": ["States.TaskFailed", "ThrottlingException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2 } + ] +}, +"EvaluateFulfillment": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $orderStatus = 'fulfilled' %}", "Next": "FulfillmentComplete" }, + { "Condition": "{% $orderStatus in ['failed', 'cancelled'] %}", "Next": "FulfillmentFailed" } + ], + "Default": "WaitBeforeNextPoll" +}, +"WaitBeforeNextPoll": { + "Type": "Wait", + "Seconds": 60, + "Next": "CheckFulfillmentStatus" +} +``` + +Key elements: +- Initial longer wait gives the operation time to start. Shorter poll interval for subsequent checks. +- Choice state routes to success, failure, or back to the wait loop. +- Always add Retry on the status-check Task to handle transient API errors. +- Consider adding `TimeoutSeconds` on the state machine or a counter variable to prevent infinite polling. + +--- + +## Compensation / Saga Pattern + +Step Functions has no built-in rollback. The saga pattern chains compensating actions in reverse order. Each forward step has a Catch that records which step failed, then routes to the appropriate compensation entry point. + +```json +"ReserveInventory": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Arguments": { + "TableName": "InventoryTable", + "Key": { "productId": { "S": "{% $states.input.productId %}" } }, + "UpdateExpression": "SET reserved = reserved + :qty", + "ExpressionAttributeValues": { ":qty": { "N": "{% $string($states.input.quantity) %}" } } + }, + "Assign": { "reservedQty": "{% $states.input.quantity %}" }, + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ReserveInventory", "errorInfo": "{% $states.errorOutput %}" }, "Next": "OrderFailed" } + ], + "Next": "ChargePayment" +}, +"ChargePayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", + "Payload": { "orderId": "{% $orderId %}", "amount": "{% $states.input.total %}" } + }, + "Assign": { "chargeId": "{% $states.result.Payload.chargeId %}" }, + "Output": "{% $states.result.Payload %}", + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ChargePayment", "errorInfo": "{% $states.errorOutput %}" }, "Next": "ReleaseInventory" } + ], + "Next": "ShipOrder" +}, +"ShipOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ShipOrder:$LATEST", + "Payload": { "orderId": "{% $orderId %}" } + }, + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ShipOrder", "errorInfo": "{% $states.errorOutput %}" }, "Next": "RefundPayment" } + ], + "Next": "OrderComplete" +}, +"RefundPayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:RefundCharge:$LATEST", + "Payload": { "chargeId": "{% $chargeId %}", "reason": "{% $errorInfo.Cause %}" } + }, + "Next": "ReleaseInventory" +}, +"ReleaseInventory": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Arguments": { + "TableName": "InventoryTable", + "Key": { "productId": { "S": "{% $states.input.productId %}" } }, + "UpdateExpression": "SET reserved = reserved - :qty", + "ExpressionAttributeValues": { ":qty": { "N": "{% $string($reservedQty) %}" } } + }, + "Next": "OrderFailed" +}, +"OrderFailed": { + "Type": "Fail", + "Error": "{% $failedStep & 'Error' %}", + "Cause": "{% 'Order ' & $orderId & ' failed at ' & $failedStep & ': ' & ($exists($errorInfo.Cause) ? $errorInfo.Cause : 'Unknown') %}" +} +``` + +Compensation chain: `ReserveInventory` fails → `OrderFailed`. `ChargePayment` fails → `ReleaseInventory` → `OrderFailed`. `ShipOrder` fails → `RefundPayment` → `ReleaseInventory` → `OrderFailed`. Each Catch records `$failedStep` and `$errorInfo`. Compensation states use variables from forward steps (`$chargeId`, `$reservedQty`) to know what to undo. + +--- + +## Nested Map / Parallel Structures + +Map, Parallel, and Task states nest in any combination. The key constraint is understanding variable scope and data flow at each nesting boundary. + +```json +"ProcessAllOrders": { + "Type": "Map", + "Items": "{% $states.input.orders %}", + "MaxConcurrency": 5, + "ItemProcessor": { + "ProcessorConfig": { "Mode": "INLINE" }, + "StartAt": "ProcessSingleOrder", + "States": { + "ProcessSingleOrder": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "ValidatePayment", + "States": { + "ValidatePayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidatePayment:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + { + "StartAt": "CheckInventory", + "States": { + "CheckInventory": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "InventoryTable", + "Key": { "productId": { "S": "{% $states.input.productId %}" } } + }, + "Output": "{% $states.result.Item %}", + "End": true + } + } + } + ], + "Output": { "payment": "{% $states.result[0] %}", "inventory": "{% $states.result[1] %}" }, + "End": true + } + } + }, + "Assign": { "orderResults": "{% $states.result %}" }, + "Next": "Summarize" +} +``` + +### Variable Scoping Across Nesting Levels + +Each nesting level creates a new scope. Inner scopes can READ outer variables but CANNOT ASSIGN to them — use `Output` on terminal states to pass data back up. Parallel branches and Map iterations are isolated from each other. Variable names must be unique across all nesting levels (no shadowing). Exception: Distributed Map (`"Mode": "DISTRIBUTED"`) cannot read outer scope variables at all. + +Data flows down via state input (use `ItemSelector` for Map, `Arguments` for Parallel) and up via `Output` on terminal states. Parallel result is an array per branch; Map result is an array per iteration. + +--- + +## Scatter-Gather with Partial Results + +When calling unreliable external APIs per-item, use `ToleratedFailurePercentage` on a Map to continue with whatever succeeded, then post-process the results to separate successes from failures. Failed iterations return objects with `Error` and `Cause` fields. + +```json +"CallExternalAPIs": { + "Type": "Map", + "Items": "{% $states.input.records %}", + "MaxConcurrency": 10, + "ToleratedFailurePercentage": 100, + "ItemProcessor": { + "ProcessorConfig": { "Mode": "INLINE" }, + "StartAt": "CallAPI", + "States": { + "CallAPI": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallExternalAPI:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Retry": [ + { "ErrorEquals": ["States.TaskFailed"], "IntervalSeconds": 2, "MaxAttempts": 2, "BackoffRate": 2.0, "JitterStrategy": "FULL" } + ], + "End": true + } + } + }, + "Next": "SplitResults" +}, +"SplitResults": { + "Type": "Pass", + "Assign": { + "successes": "{% ( $s := $states.input[$not($exists(Error))]; $type($s) = 'array' ? $s : $exists($s) ? [$s] : [] ) %}", + "failures": "{% ( $f := $states.input[$exists(Error)]; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}" + }, + "Output": { + "successes": "{% ( $s := $states.input[$not($exists(Error))]; $type($s) = 'array' ? $s : $exists($s) ? [$s] : [] ) %}", + "failures": "{% ( $f := $states.input[$exists(Error)]; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}", + "totalProcessed": "{% $count($states.input) %}" + }, + "Next": "EvaluateResults" +}, +"EvaluateResults": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $count($successes) = 0 %}", "Next": "AllFailed" } + ], + "Default": "ProcessSuccesses" +} +``` + +Key elements: +- `ToleratedFailurePercentage: 100` lets the Map complete even if every item fails. Lower the threshold to bail out early. +- Filter on `$exists(Error)` to separate failed from successful iterations. +- Guard filtered results with the `$type`/`$exists`/`[]` pattern — JSONata returns a single object (not a 1-element array) when exactly one item matches, and undefined when nothing matches. + +--- + +## Semaphore / Concurrency Lock + +Step Functions has no native mutual exclusion. Use DynamoDB conditional writes as a distributed lock when only one execution should process a given resource at a time. Pattern: acquire lock → do work → release lock, with Catch ensuring release on failure. + +```json +"AcquireLock": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Arguments": { + "TableName": "LocksTable", + "Item": { + "lockId": { "S": "{% $states.input.customerId %}" }, + "executionId": { "S": "{% $states.context.Execution.Id %}" }, + "expiresAt": { "N": "{% $string($toMillis($now()) + 900000) %}" } + }, + "ConditionExpression": "attribute_not_exists(lockId) OR expiresAt < :now", + "ExpressionAttributeValues": { + ":now": { "N": "{% $string($toMillis($now())) %}" } + } + }, + "Retry": [ + { "ErrorEquals": ["DynamoDB.ConditionalCheckFailedException"], "IntervalSeconds": 5, "MaxAttempts": 12, "BackoffRate": 1.5, "JitterStrategy": "FULL" } + ], + "Catch": [ + { "ErrorEquals": ["DynamoDB.ConditionalCheckFailedException"], "Assign": { "lockError": "{% $states.errorOutput %}" }, "Next": "LockUnavailable" } + ], + "Next": "DoProtectedWork" +}, +"DoProtectedWork": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessCustomer:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Catch": [ + { "ErrorEquals": ["States.ALL"], "Assign": { "workError": "{% $states.errorOutput %}" }, "Next": "ReleaseLock" } + ], + "Next": "ReleaseLock" +}, +"ReleaseLock": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:deleteItem", + "Arguments": { + "TableName": "LocksTable", + "Key": { "lockId": { "S": "{% $states.input.customerId %}" } }, + "ConditionExpression": "executionId = :execId", + "ExpressionAttributeValues": { ":execId": { "S": "{% $states.context.Execution.Id %}" } } + }, + "Retry": [ + { "ErrorEquals": ["States.ALL"], "IntervalSeconds": 1, "MaxAttempts": 3, "BackoffRate": 2.0 } + ], + "Next": "CheckWorkResult" +}, +"CheckWorkResult": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $exists($workError) %}", "Next": "WorkFailed" } + ], + "Default": "Done" +}, +"LockUnavailable": { + "Type": "Fail", + "Error": "LockContention", + "Cause": "{% 'Could not acquire lock for ' & $states.input.customerId & ' after retries' %}" +} +``` + +Key elements: +- `ConditionExpression` with `attribute_not_exists` ensures only one writer wins. The `expiresAt` check provides stale-lock recovery if an execution crashes without releasing. +- `executionId` on the lock item lets `ReleaseLock` conditionally delete only its own lock. +- Retry on `ConditionalCheckFailedException` acts as a spin-wait. Tune `MaxAttempts` and `IntervalSeconds` based on expected hold time. +- Catch on `DoProtectedWork` routes to `ReleaseLock` so the lock is always released. After releasing, `CheckWorkResult` re-raises the error path. +- Set `expiresAt` to a reasonable TTL (here 15 min). Use a DynamoDB TTL attribute to auto-clean expired locks. + +--- + +## Human-in-the-Loop with Timeout Escalation + +Chain multiple `.waitForTaskToken` states with `States.Timeout` catches to build escalation: primary approver → manager → auto-reject. + +```json +"RequestApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'approver': $states.input.primaryApprover, 'amount': $states.input.amount}) %}" + }, + "TimeoutSeconds": 86400, + "Assign": { "approvalResult": "{% $states.result %}" }, + "Catch": [ + { "ErrorEquals": ["States.Timeout"], "Assign": { "escalationReason": "Primary approver did not respond within 24 hours" }, "Next": "EscalateToManager" }, + { "ErrorEquals": ["States.ALL"], "Assign": { "approvalError": "{% $states.errorOutput %}" }, "Next": "ApprovalFailed" } + ], + "Next": "EvaluateApproval" +}, +"EscalateToManager": { + "Type": "Task", + "Resource": "arn:aws:states:::sns:publish", + "Arguments": { + "TopicArn": "arn:aws:sns:us-east-1:123456789012:EscalationNotifications", + "Subject": "Approval Escalation", + "Message": "{% 'Order ' & $orderId & ' requires manager approval. ' & $escalationReason %}" + }, + "Next": "WaitForManagerApproval" +}, +"WaitForManagerApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'approver': $states.input.managerApprover, 'amount': $states.input.amount, 'escalated': true}) %}" + }, + "TimeoutSeconds": 43200, + "Assign": { "approvalResult": "{% $states.result %}" }, + "Catch": [ + { + "ErrorEquals": ["States.Timeout"], + "Assign": { "approvalResult": { "decision": "rejected", "reason": "No response from manager within 12 hours — auto-rejected" } }, + "Next": "EvaluateApproval" + }, + { "ErrorEquals": ["States.ALL"], "Assign": { "approvalError": "{% $states.errorOutput %}" }, "Next": "ApprovalFailed" } + ], + "Next": "EvaluateApproval" +}, +"EvaluateApproval": { + "Type": "Choice", + "Choices": [ + { "Condition": "{% $approvalResult.decision = 'approved' %}", "Next": "ProcessApprovedOrder" } + ], + "Default": "OrderRejected" +} +``` + +Key elements: +- Each callback stage has its own `TimeoutSeconds` — shorter for escalation stages since urgency increases. +- `States.Timeout` in Catch distinguishes "no response" from actual errors, routing to the next escalation tier. +- The final tier auto-rejects by assigning a synthetic result in Catch `Assign` and routing to the same `EvaluateApproval` Choice. This avoids duplicating decision logic. +- External system calls `SendTaskSuccess` with `{"decision": "approved"}` or `{"decision": "rejected", "reason": "..."}`. +- Use Standard (not Express) workflows — Express doesn't support `.waitForTaskToken`. + +--- + +## Express → Standard Handoff + +Express workflows are more cost-effective for high volume State Machine Invocations, but don't support callbacks or long waits. Standard workflows handle those but cost per state transition. Use Express for fast, high-volume ingest and kick off a Standard execution for the long-running tail. + +```json +{ + "Comment": "Express workflow — fast ingest and validation", + "QueryLanguage": "JSONata", + "StartAt": "ValidateInput", + "States": { + "ValidateInput": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidateOrder:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "EnrichData" + }, + "EnrichData": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "LookupCustomer", + "States": { + "LookupCustomer": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "CustomersTable", + "Key": { "customerId": { "S": "{% $states.input.customerId %}" } } + }, + "Output": "{% $states.result.Item %}", + "End": true + } + } + }, + { + "StartAt": "LookupPricing", + "States": { + "LookupPricing": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetPricing:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + } + ], + "Output": { + "order": "{% $states.input %}", + "customer": "{% $states.result[0] %}", + "pricing": "{% $states.result[1] %}" + }, + "Next": "HandOffToStandard" + }, + "HandOffToStandard": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution", + "Arguments": { + "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment-Standard", + "Input": "{% $string($states.input) %}" + }, + "Output": { + "status": "handed_off", + "childExecutionArn": "{% $states.result.ExecutionArn %}" + }, + "End": true + } + } +} +``` + +Key elements: +- Express does validation, enrichment, fan-out — fast, stateless work that benefits from per-request pricing. +- `HandOffToStandard` uses fire-and-forget (no `.sync` suffix) so the Express execution completes immediately. Use `.sync:2` if you need to wait, but watch the 5-minute Express limit. +- Use `$string($states.input)` to serialize — `startExecution` expects a JSON string for `Input`. +- Ideal for event-driven architectures: API Gateway or EventBridge triggers Express at high volume, only orders needing long-running processing incur Standard costs. diff --git a/aws-step-functions/steering/asl-state-types.md b/aws-step-functions/steering/asl-state-types.md new file mode 100644 index 0000000..00c1929 --- /dev/null +++ b/aws-step-functions/steering/asl-state-types.md @@ -0,0 +1,474 @@ +# ASL Structure and State Types (JSONata Mode) + +## State Machine Top-Level Structure + +```json +{ + "Comment": "Description of the state machine", + "QueryLanguage": "JSONata", + "StartAt": "FirstStateName", + "TimeoutSeconds": 3600, + "Version": "1.0", + "States": { + "FirstStateName": { ... }, + "SecondStateName": { ... } + } +} +``` + +- `QueryLanguage`: Set to `"JSONata"` at top level. Defaults to `"JSONPath"` if omitted. +- `StartAt`: Must exactly match a state name (case-sensitive). +- `TimeoutSeconds`: Optional max execution time. Exceeding it throws `States.Timeout`. +- `States`: Required object containing all state definitions. +- State names must be unique and ≤ 80 Unicode characters. + +## Common Fields for All JSONata States + +| Field | Description | +|-------|-------------| +| `Type` | Required. One of: Task, Pass, Choice, Wait, Parallel, Map, Succeed, Fail | +| `Comment` | Optional human-readable description | +| `Next` | Name of next state (required for non-terminal states except Choice) | +| `End` | Set to `true` for terminal states | +| `Output` | Optional. Transform state output. Available in all types except Fail | +| `Assign` | Optional. Store workflow variables. Available in all types except Succeed and Fail | +| `QueryLanguage` | Optional per-state override | + +## Field Availability Matrix (JSONata) + +``` + Task Parallel Map Pass Wait Choice Succeed Fail +Type ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ +Comment ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ +Output ✓ ✓ ✓ ✓ ✓ ✓ ✓ +Assign ✓ ✓ ✓ ✓ ✓ ✓ +Next/End ✓ ✓ ✓ ✓ ✓ +Arguments ✓ ✓ +Retry/Catch ✓ ✓ ✓ +``` + +--- + +## Pass State + +Passes input to output, optionally transforming it. Useful for injecting data or reshaping payloads. + +```json +"InjectData": { + "Type": "Pass", + "Output": { + "greeting": "{% 'Hello, ' & $states.input.name %}", + "timestamp": "{% $now() %}" + }, + "Next": "NextState" +} +``` + +With variable assignment: + +```json +"StoreDefaults": { + "Type": "Pass", + "Assign": { + "retryCount": 0, + "maxRetries": 3, + "config": "{% $states.input.configuration %}" + }, + "Next": "ProcessItem" +} +``` + +Without `Output`, the Pass state copies input to output unchanged. + +--- + +## Task State + +Executes work via AWS service integrations, activities, or HTTP APIs. + +### Required Fields +- `Resource`: ARN identifying the task to execute + +### Optional Fields +- `Arguments`: Input to the task (replaces JSONPath `Parameters`) +- `Output`: Transform the result +- `Assign`: Store variables from input or result +- `TimeoutSeconds`: Max task duration (default 60, accepts JSONata expression) +- `HeartbeatSeconds`: Heartbeat interval (must be < TimeoutSeconds) +- `Retry`: Retry policy array +- `Catch`: Error handler array +- `Credentials`: Cross-account role assumption + +### Lambda Invoke Example + +```json +"InvokeLambda": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunc:$LATEST", + "Payload": { + "orderId": "{% $states.input.orderId %}", + "customer": "{% $states.input.customer %}" + } + }, + "Assign": { + "processedResult": "{% $states.result.Payload %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "NextState" +} +``` + +### Dynamic Timeout + +```json +"LongRunningTask": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:SlowFunc:$LATEST", + "Payload": "{% $states.input %}" + }, + "TimeoutSeconds": "{% $states.input.timeoutValue %}", + "HeartbeatSeconds": "{% $states.input.heartbeatValue %}", + "Next": "Done" +} +``` + +--- + +## Choice State + +Adds branching logic. Uses `Condition` field with JSONata boolean expressions (replaces JSONPath `Variable` + comparison operators). + +### Structure + +```json +"RouteOrder": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $states.input.orderType = 'express' %}", + "Next": "ExpressShipping" + }, + { + "Condition": "{% $states.input.total > 100 %}", + "Assign": { + "discount": "{% $states.input.total * 0.1 %}" + }, + "Output": { + "total": "{% $states.input.total * 0.9 %}" + }, + "Next": "ApplyDiscount" + }, + { + "Condition": "{% $states.input.priority >= 5 and $states.input.category = 'urgent' %}", + "Next": "PriorityQueue" + } + ], + "Default": "StandardProcessing", + "Assign": { + "routedDefault": true + } +} +``` + +Key points: +- `Condition` must evaluate to a boolean. +- Each Choice Rule can have its own `Assign` and `Output`. +- If a rule matches, its `Assign`/`Output` are used (not the state-level ones). +- If no rule matches, the state-level `Assign` is evaluated and `Default` is followed. +- `Default` is optional but recommended — without it, `States.NoChoiceMatched` is thrown. +- Choice states cannot be terminal (no `End` field). + +### Complex Conditions + +JSONata supports rich boolean logic: + +```json +"Condition": "{% $states.input.age >= 18 and $states.input.age <= 65 %}" +"Condition": "{% $states.input.status = 'active' or $states.input.override = true %}" +"Condition": "{% $not($exists($states.input.error)) %}" +"Condition": "{% $contains($states.input.email, '@') %}" +"Condition": "{% $count($states.input.items) > 0 %}" +"Condition": "{% $states.input.score >= $threshold %}" +``` + +--- + +## Wait State + +Delays execution for a specified duration or until a timestamp. + +### Wait by Seconds + +```json +"WaitTenSeconds": { + "Type": "Wait", + "Seconds": 10, + "Next": "Continue" +} +``` + +### Wait with Dynamic Seconds + +```json +"DynamicWait": { + "Type": "Wait", + "Seconds": "{% $states.input.delaySeconds %}", + "Next": "Continue" +} +``` + +### Wait Until Timestamp + +```json +"WaitUntilDate": { + "Type": "Wait", + "Timestamp": "{% $states.input.scheduledTime %}", + "Next": "Execute" +} +``` + +Timestamps must conform to RFC3339 (e.g., `"2026-03-14T01:59:00Z"`). + +A Wait state must contain exactly one of `Seconds` or `Timestamp`. + +--- + +## Succeed State + +Terminates the state machine (or a Parallel branch / Map iteration) successfully. + +```json +"Done": { + "Type": "Succeed", + "Output": { + "status": "completed", + "processedAt": "{% $now() %}" + } +} +``` + +Without `Output`, passes input through as output. No `Next` field allowed. + +--- + +## Fail State + +Terminates the state machine with an error. + +```json +"OrderFailed": { + "Type": "Fail", + "Error": "OrderValidationError", + "Cause": "The order could not be validated" +} +``` + +### Dynamic Error and Cause + +```json +"DynamicFail": { + "Type": "Fail", + "Error": "{% $states.input.errorCode %}", + "Cause": "{% $states.input.errorMessage %}" +} +``` + +Build rich, defensive error messages with fallbacks for missing fields: + +```json +"OrderProcessingFailed": { + "Type": "Fail", + "Error": "OrderProcessingError", + "Cause": "{% 'Failed to process order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown error') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details available') & '. Timestamp: ' & $now() %}" +} +``` + +No `Next`, `End`, `Output`, or `Assign` fields. Fail states are always terminal. + +--- + +## Parallel State + +Executes multiple branches concurrently. All branches receive the same input. + +```json +"LookupCustomerInfo": { + "Type": "Parallel", + "Arguments": { + "customerId": "{% $states.input.customerId %}" + }, + "Branches": [ + { + "StartAt": "GetAddress", + "States": { + "GetAddress": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetAddress:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + { + "StartAt": "GetOrders", + "States": { + "GetOrders": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetOrders:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + } + ], + "Assign": { + "address": "{% $states.result[0] %}", + "orders": "{% $states.result[1] %}" + }, + "Output": { + "address": "{% $states.result[0] %}", + "orders": "{% $states.result[1] %}" + }, + "Next": "ProcessResults" +} +``` + +Key points: +- `Arguments` provides input to each branch's StartAt state (optional, defaults to state input). +- Result is an array with one element per branch, in the same order as `Branches`. +- If any branch fails, the entire Parallel state fails (unless caught). +- States inside branches can only transition to other states within the same branch. +- Branch variables are scoped — branches cannot access each other's variables. +- Use `Output` on terminal states within branches to pass data back to the outer scope. + +--- + +## Map State + +Iterates over an array, processing each element (potentially in parallel). + +### Basic Map + +```json +"ProcessItems": { + "Type": "Map", + "Items": "{% $states.input.orders %}", + "MaxConcurrency": 10, + "ItemProcessor": { + "StartAt": "ProcessOrder", + "States": { + "ProcessOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessOrder:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + "Output": "{% $states.result %}", + "Next": "AllDone" +} +``` + +### Map with ItemSelector + +Use `ItemSelector` to reshape each item before processing: + +```json +"ProcessItems": { + "Type": "Map", + "Items": "{% $states.input.detail.shipped %}", + "ItemSelector": { + "parcel": "{% $states.context.Map.Item.Value %}", + "index": "{% $states.context.Map.Item.Index %}", + "courier": "{% $states.input.detail.`delivery-partner` %}" + }, + "MaxConcurrency": 0, + "ItemProcessor": { + "StartAt": "Ship", + "States": { + "Ship": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ShipItem:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + "Next": "Done" +} +``` + +### Map Context Variables + +Inside `ItemSelector`, you can access: +- `$states.context.Map.Item.Value` — the current array element +- `$states.context.Map.Item.Index` — the zero-based index + +### Key Map Fields + +| Field | Description | +|-------|-------------| +| `Items` | JSON array or JSONata expression evaluating to an array | +| `ItemProcessor` | State machine to run for each item (has `StartAt` and `States`) | +| `ItemSelector` | Reshape each item before processing | +| `MaxConcurrency` | Max parallel iterations (0 = unlimited, 1 = sequential) | +| `ToleratedFailurePercentage` | 0-100, percentage of items allowed to fail | +| `ToleratedFailureCount` | Number of items allowed to fail | +| `ItemReader` | Read items from an external resource | +| `ItemBatcher` | Batch items into sub-arrays | +| `ResultWriter` | Write results to an external resource | + +### ProcessorConfig + +The `ItemProcessor` can include a `ProcessorConfig` to control execution mode: + +```json +"ItemProcessor": { + "ProcessorConfig": { + "Mode": "INLINE" + }, + "StartAt": "ProcessOrder", + "States": { ... } +} +``` + +- `INLINE` (default) — iterations run within the parent execution. Use for most cases. +- `DISTRIBUTED` — iterations run as child executions. Use for large-scale processing (thousands+ items), items read from S3, or when you need per-iteration execution history. + +### Failure Tolerance + +```json +"ProcessWithTolerance": { + "Type": "Map", + "Items": "{% $states.input.records %}", + "ToleratedFailurePercentage": 10, + "ToleratedFailureCount": 5, + "ItemProcessor": { ... }, + "Next": "Done" +} +``` + +The Map state fails if either threshold is breached. + +--- \ No newline at end of file diff --git a/aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md b/aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md new file mode 100644 index 0000000..413ebac --- /dev/null +++ b/aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md @@ -0,0 +1,364 @@ +# Converting from JSONPath to JSONata + +Systematic conversion guide for migrating existing JSONPath state machines to JSONata. Covers field mapping, state-type patterns, intrinsic function replacements, and common pitfalls. + +## Migration Strategy + +Convert incrementally by setting `QueryLanguage` per-state. JSONPath states and JSONata states can coexist: + +```json +{ + "StartAt": "LegacyState", + "States": { + "LegacyState": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Legacy:$LATEST", "Payload.$": "$" }, + "ResultPath": "$.legacyResult", + "Next": "MigratedState" + }, + "MigratedState": { + "Type": "Task", + "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Modern:$LATEST", "Payload": "{% $states.input %}" }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } +} +``` + +When all states are converted, promote `QueryLanguage` to the top level and remove per-state declarations. + +--- + +## Field Mapping Reference + +### I/O Fields: Five Become Two + +| JSONPath Field | JSONata Equivalent | +|---|---| +| `InputPath` | Not needed — use `$states.input.path` directly in `Arguments` | +| `Parameters` | `Arguments` | +| `ResultSelector` | `Output` (reference `$states.result`) | +| `ResultPath` | `Output` with `$merge`, or `Assign` (preferred) | +| `OutputPath` | `Output` (return only what you need) | + +### Path Fields Eliminated + +| JSONPath | JSONata | +|---|---| +| `TimeoutSecondsPath` | `TimeoutSeconds` with `{% %}` | +| `HeartbeatSecondsPath` | `HeartbeatSeconds` with `{% %}` | +| `ItemsPath` | `Items` with `{% %}` | + +### Syntax Changes + +| JSONPath | JSONata | +|---|---| +| `"key.$": "$.field"` | `"key": "{% $states.input.field %}"` | +| `$` or `$.field` (state input) | `$states.input` or `$states.input.field` | +| `$$` (context object) | `$states.context` | +| `$$.Execution.Input` | `$states.context.Execution.Input` | +| `$$.Task.Token` | `$states.context.Task.Token` | +| `$$.Map.Item.Value` | `$states.context.Map.Item.Value` | +| `$variable` (workflow var) | `$variable` (unchanged) | + +--- + +## Converting Each State Type + +### Task State + +**Before (JSONPath):** +```json +"ProcessOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "InputPath": "$.order", + "Parameters": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", + "Payload": { "id.$": "$.orderId", "customer.$": "$.customerName" } + }, + "ResultSelector": { "processedId.$": "$.Payload.id", "status.$": "$.Payload.status" }, + "ResultPath": "$.processingResult", + "OutputPath": "$.processingResult", + "Next": "Ship" +} +``` + +**After (JSONata):** +```json +"ProcessOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", + "Payload": { "id": "{% $states.input.order.orderId %}", "customer": "{% $states.input.order.customerName %}" } + }, + "Output": { "processedId": "{% $states.result.Payload.id %}", "status": "{% $states.result.Payload.status %}" }, + "Next": "Ship" +} +``` + +Steps: (1) Fold `InputPath` path into `$states.input` references. (2) `Parameters` → `Arguments`, remove `.$` suffixes, wrap in `{% %}`. (3) Collapse `ResultSelector` + `ResultPath` + `OutputPath` into `Output`. + +### ResultPath Patterns + +**Merging result into input** (`ResultPath: "$.field"`): +```json +// Preferred: use Assign to store, pass input through +"Assign": { "priceResult": "{% $states.result.Payload %}" }, +"Output": "{% $states.input %}" + +// Alternative: explicit merge +"Output": "{% $merge([$states.input, {'priceResult': $states.result.Payload}]) %}" +``` + +**Discarding result** (`ResultPath: null`): +```json +"Output": "{% $states.input %}" +``` + +### Pass State + +**Before:** `Result` + `ResultPath` → **After:** `Output` (or just `Assign` if downstream uses variables) + +```json +// JSONPath +"InjectDefaults": { "Type": "Pass", "Result": { "region": "us-east-1" }, "ResultPath": "$.config", "Next": "Go" } + +// JSONata — use Assign when possible +"InjectDefaults": { "Type": "Pass", "Assign": { "region": "us-east-1" }, "Next": "Go" } +``` + +### Choice State + +JSONPath uses `Variable` + typed operators. JSONata uses a single `Condition` expression. + +**Before (JSONPath):** +```json +"Choices": [ + { "Variable": "$.status", "StringEquals": "approved", "Next": "Approved" }, + { "And": [ + { "Variable": "$.priority", "StringEquals": "high" }, + { "Variable": "$.age", "NumericLessThanEquals": 30 } + ], "Next": "FastTrack" }, + { "Not": { "Variable": "$.email", "IsPresent": true }, "Next": "RequestEmail" } +] +``` + +**After (JSONata):** +```json +"Choices": [ + { "Condition": "{% $states.input.status = 'approved' %}", "Next": "Approved" }, + { "Condition": "{% $states.input.priority = 'high' and $states.input.age <= 30 %}", "Next": "FastTrack" }, + { "Condition": "{% $not($exists($states.input.email)) %}", "Next": "RequestEmail" } +] +``` + +#### Choice Operator Mapping + +| JSONPath Operator | JSONata | +|---|---| +| `StringEquals` / `StringEqualsPath` | `= 'value'` / `= $states.input.other` | +| `NumericGreaterThan` / `NumericLessThanEquals` | `> value` / `<= value` | +| `BooleanEquals` | `= true` / `= false` | +| `TimestampGreaterThan` | `$toMillis(field) > $toMillis('ISO-timestamp')` | +| `IsPresent: true` / `false` | `$exists(field)` / `$not($exists(field))` | +| `IsNull: true` | `field = null` | +| `IsNumeric` / `IsString` / `IsBoolean` | `$type(field) = 'number'` / `'string'` / `'boolean'` | +| `StringMatches` (wildcards) | `$contains(field, /regex/)` | +| `And` / `Or` / `Not` | `and` / `or` / `$not()` | + +### Wait State + +`SecondsPath` → `Seconds` with `{% %}`. `TimestampPath` → `Timestamp` with `{% %}`. + +```json +// JSONPath +{ "Type": "Wait", "TimestampPath": "$.deliveryDate", "Next": "Check" } +// JSONata +{ "Type": "Wait", "Timestamp": "{% $states.input.deliveryDate %}", "Next": "Check" } +``` + +### Map State + +| JSONPath | JSONata | +|---|---| +| `ItemsPath` | `Items` (fold `InputPath` into expression) | +| `Parameters` (with `$$.Map.*`) | `ItemSelector` (with `$states.context.Map.*`) | +| `Iterator` | `ItemProcessor` (add `ProcessorConfig`) | +| `ResultSelector` inside iterator | `Output` inside processor states | +| `ResultPath` on Map | `Assign` or `$merge` in `Output` | + +```json +// JSONata Map +"ProcessItems": { + "Type": "Map", + "Items": "{% $states.input.orderData.items %}", + "ItemSelector": { + "item": "{% $states.context.Map.Item.Value %}", + "index": "{% $states.context.Map.Item.Index %}" + }, + "MaxConcurrency": 5, + "ItemProcessor": { + "ProcessorConfig": { "Mode": "INLINE" }, + "StartAt": "Process", + "States": { + "Process": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", "Payload": "{% $states.input %}" }, + "Output": "{% $states.result.Payload %}", + "End": true + } + } + }, + "Assign": { "processedItems": "{% $states.result %}" }, + "Next": "Done" +} +``` + +--- + +## Converting Intrinsic Functions + +| JSONPath Intrinsic | JSONata Equivalent | +|---|---| +| `States.Format('Order {}', $.id)` | `'Order ' & $states.input.id` | +| `States.StringToJson($.str)` | `$parse($states.input.str)` | +| `States.JsonToString($.obj)` | `$string($states.input.obj)` | +| `States.Array($.a, $.b)` | `[$states.input.a, $states.input.b]` | +| `States.ArrayPartition($.arr, 2)` | `$partition($states.input.arr, 2)` | +| `States.ArrayContains($.arr, $.v)` | `$states.input.v in $states.input.arr` | +| `States.ArrayRange(0, 10, 2)` | `$range(0, 10, 2)` | +| `States.ArrayGetItem($.arr, 0)` | `$states.input.arr[0]` | +| `States.ArrayLength($.arr)` | `$count($states.input.arr)` | +| `States.ArrayUnique($.arr)` | `$distinct($states.input.arr)` | +| `States.Base64Encode($.str)` | `$base64encode($states.input.str)` | +| `States.Base64Decode($.str)` | `$base64decode($states.input.str)` | +| `States.Hash($.data, 'SHA-256')` | `$hash($states.input.data, 'SHA-256')` | +| `States.JsonMerge($.a, $.b)` | `$merge([$states.input.a, $states.input.b])` | +| `States.MathRandom()` | `$random()` | +| `States.MathAdd($.a, $.b)` | `$states.input.a + $states.input.b` | +| `States.UUID()` | `$uuid()` | + +--- + +## Converting Catch Blocks + +JSONPath Catch uses `ResultPath`. JSONata Catch uses `Assign` and `Output` with `$states.errorOutput`. + +```json +// JSONPath +"Catch": [{ "ErrorEquals": ["States.ALL"], "ResultPath": "$.error", "Next": "HandleError" }] + +// JSONata — preferred: store in variable +"Catch": [{ "ErrorEquals": ["States.ALL"], "Assign": { "errorInfo": "{% $states.errorOutput %}" }, "Next": "HandleError" }] + +// JSONata — if downstream expects merged object +"Catch": [{ + "ErrorEquals": ["States.ALL"], + "Assign": { "errorInfo": "{% $states.errorOutput %}" }, + "Output": "{% $merge([$states.input, {'error': $states.errorOutput}]) %}", + "Next": "HandleError" +}] +``` + +Retry syntax is identical between JSONPath and JSONata — no conversion needed. + +--- + +## Context Object Reference Mapping + +| JSONPath (`$$`) | JSONata (`$states.context`) | +|---|---| +| `$$.Execution.Id` | `$states.context.Execution.Id` | +| `$$.Execution.Input` | `$states.context.Execution.Input` | +| `$$.Execution.Name` | `$states.context.Execution.Name` | +| `$$.Execution.StartTime` | `$states.context.Execution.StartTime` | +| `$$.State.Name` | `$states.context.State.Name` | +| `$$.State.EnteredTime` | `$states.context.State.EnteredTime` | +| `$$.StateMachine.Id` | `$states.context.StateMachine.Id` | +| `$$.Task.Token` | `$states.context.Task.Token` | +| `$$.Map.Item.Value` | `$states.context.Map.Item.Value` | +| `$$.Map.Item.Index` | `$states.context.Map.Item.Index` | + +--- + +## Common Conversion Pitfalls + +### 1. Mixing JSONPath and JSONata fields in the same state +Invalid combinations: `Arguments` + `InputPath`, `Output` + `ResultSelector`, `Condition` + `Variable`. Remove all JSONPath fields from converted states. + +### 2. Forgetting to remove `.$` suffixes +```json +❌ "orderId.$": "{% $states.input.orderId %}" +✓ "orderId": "{% $states.input.orderId %}" +``` + +### 3. Using `$` or `$$` instead of `$states` +```json +❌ "{% $.orderId %}" ❌ "{% $$.Task.Token %}" +✓ "{% $states.input.orderId %}" ✓ "{% $states.context.Task.Token %}" +``` +Note: `$` is valid inside nested filter expressions (e.g., `$states.input.items[$.price > 10]`). + +### 4. Double quotes inside JSONata expressions +```json +❌ "{% $states.input.status = "active" %}" +✓ "{% $states.input.status = 'active' %}" +``` + +### 5. Expecting Assign values in Output of the same state +`Assign` and `Output` evaluate in parallel — new variable values are not available in `Output`: +```json +❌ "Assign": { "total": "{% $states.result.Payload.total %}" }, + "Output": { "total": "{% $total %}" } +✓ "Assign": { "total": "{% $states.result.Payload.total %}" }, + "Output": { "total": "{% $states.result.Payload.total %}" } +``` + +### 6. Undefined field access +JSONPath silently returns null. JSONata throws `States.QueryEvaluationError`: +```json +❌ "{% $states.input.customer.middleName %}" +✓ "{% $exists($states.input.customer.middleName) ? $states.input.customer.middleName : '' %}" +``` + +### 7. Single-item filter results +JSONata returns a single object (not a 1-element array) when exactly one item matches a filter, and undefined when nothing matches. Both break Map state `Items` and functions like `$count`: +```json +❌ "Items": "{% $states.input.orders[status = 'pending'] %}" +✓ "Items": "{% ( $f := $states.input.orders[status = 'pending']; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}" +``` + +### 8. Iterator → ItemProcessor rename +`Iterator` was renamed to `ItemProcessor` and requires `ProcessorConfig`: +```json +❌ "Iterator": { "StartAt": "...", "States": {...} } +✓ "ItemProcessor": { "ProcessorConfig": { "Mode": "INLINE" }, "StartAt": "...", "States": {...} } +``` + +--- + +## Conversion Checklist + +1. Add `"QueryLanguage": "JSONata"` (per-state or top-level) +2. Remove all five JSONPath I/O fields (`InputPath`, `Parameters`, `ResultSelector`, `ResultPath`, `OutputPath`) +3. `Parameters` → `Arguments` (remove `.$`, wrap in `{% %}`, `$` → `$states.input`) +4. Collapse `ResultSelector` + `ResultPath` + `OutputPath` into single `Output` +5. `ResultPath: null` → `Output: "{% $states.input %}"` +6. `ResultPath: "$.field"` → `Assign` (preferred) or `Output` with `$merge` +7. `*Path` fields → base field + `{% %}` expression +8. `$$` → `$states.context` +9. `States.*` intrinsic functions → JSONata equivalents (see table above) +10. Choice `Variable` + operators → `Condition` expression +11. `Iterator` → `ItemProcessor` with `ProcessorConfig` +12. Catch `ResultPath` → Catch `Assign`/`Output` with `$states.errorOutput` +13. Pass `Result` → `Output` +14. Refactor `ResultPath` merge chains to use `Assign` variables +15. Test each state individually via Workflow Studio Test State +16. Promote `QueryLanguage` to top level when all states are converted \ No newline at end of file diff --git a/aws-step-functions/steering/error-handling.md b/aws-step-functions/steering/error-handling.md new file mode 100644 index 0000000..b27ad18 --- /dev/null +++ b/aws-step-functions/steering/error-handling.md @@ -0,0 +1,445 @@ +# Error Handling in JSONata Mode + +## Overview + +When a state encounters an error, Step Functions defaults to failing the entire execution. You can override this with `Retry` (retry the failed state) and `Catch` (transition to a fallback state). + +`Retry` and `Catch` are available on: Task, Parallel, and Map states. + +## Error Names + +Errors are identified by case-sensitive strings. Step Functions defines these built-in error codes: + +| Error Code | Description | +|-----------|-------------| +| `States.ALL` | Wildcard — matches any error | +| `States.Timeout` | Task exceeded `TimeoutSeconds` or missed heartbeat | +| `States.HeartbeatTimeout` | Task missed heartbeat interval | +| `States.TaskFailed` | Task failed during execution | +| `States.Permissions` | Insufficient privileges | +| `States.ResultPathMatchFailure` | ResultPath cannot be applied (JSONPath only) | +| `States.ParameterPathFailure` | Parameter path resolution failed (JSONPath only) | +| `States.QueryEvaluationError` | JSONata expression evaluation failed | +| `States.BranchFailed` | A Parallel state branch failed | +| `States.NoChoiceMatched` | No Choice rule matched and no Default | +| `States.IntrinsicFailure` | Intrinsic function failed (JSONPath only) | +| `States.ExceedToleratedFailureThreshold` | Map state exceeded failure tolerance | +| `States.ItemReaderFailed` | Map state ItemReader failed | +| `States.ResultWriterFailed` | Map state ResultWriter failed | + +Custom error names are allowed but must NOT start with `States.`. + +--- + +## Retry + +The `Retry` field is an array of Retrier objects. The interpreter scans retriers in order and uses the first one whose `ErrorEquals` matches. + +### Retrier Fields + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `ErrorEquals` | string[] | Required | Error names to match | +| `IntervalSeconds` | integer | 1 | Seconds before first retry | +| `MaxAttempts` | integer | 3 | Maximum retry attempts (0 = never retry) | +| `BackoffRate` | number | 2.0 | Multiplier for retry interval (must be ≥ 1.0) | +| `MaxDelaySeconds` | integer | — | Cap on retry interval | +| `JitterStrategy` | string | — | Jitter strategy (e.g., `"FULL"`) | + +### Basic Retry + +```json +"ProcessPayment": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Pay:$LATEST", + "Payload": "{% $states.input %}" + }, + "Retry": [ + { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 2, + "MaxAttempts": 3, + "BackoffRate": 2.0 + } + ], + "Next": "Confirm" +} +``` + +This retries after 2s, 4s, 8s (3 attempts with 2x backoff). + +### Retry with Max Delay and Jitter + +```json +"Retry": [ + { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 1, + "MaxAttempts": 5, + "BackoffRate": 2.0, + "MaxDelaySeconds": 30, + "JitterStrategy": "FULL" + } +] +``` + +### Multiple Retriers + +Retriers are evaluated in order. Each retrier tracks its own attempt count independently: + +```json +"Retry": [ + { + "ErrorEquals": ["ThrottlingException"], + "IntervalSeconds": 1, + "MaxAttempts": 5, + "BackoffRate": 2.0, + "JitterStrategy": "FULL" + }, + { + "ErrorEquals": ["States.Timeout"], + "MaxAttempts": 0 + }, + { + "ErrorEquals": ["States.ALL"], + "IntervalSeconds": 3, + "MaxAttempts": 2, + "BackoffRate": 1.5 + } +] +``` + +Rules: +- `States.ALL` must appear alone in its `ErrorEquals` array. +- `States.ALL` must be in the last retrier. +- `MaxAttempts: 0` means "never retry this error." +- Retrier attempt counts reset when the interpreter transitions to another state. + +--- + +## Catch + +The `Catch` field is an array of Catcher objects. After retries are exhausted (or if no retrier matches), the interpreter scans catchers in order. + +### Catcher Fields (JSONata) + +| Field | Type | Description | +|-------|------|-------------| +| `ErrorEquals` | string[] | Required. Error names to match | +| `Next` | string | Required. State to transition to | +| `Output` | any | Optional. Transform the error output | +| `Assign` | object | Optional. Assign variables from error context | + +### Basic Catch + +```json +"ProcessOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", + "Payload": "{% $states.input %}" + }, + "Catch": [ + { + "ErrorEquals": ["ValidationError"], + "Output": { + "error": "{% $states.errorOutput.Error %}", + "cause": "{% $states.errorOutput.Cause %}", + "originalInput": "{% $states.input %}" + }, + "Next": "HandleValidationError" + }, + { + "ErrorEquals": ["States.ALL"], + "Output": "{% $states.errorOutput %}", + "Next": "HandleGenericError" + } + ], + "Next": "Success" +} +``` + +### Error Output Structure + +When a state fails and matches a Catcher, the Error Output is a JSON object with: +- `Error` (string) — the error name +- `Cause` (string) — human-readable error description + +```json +{ + "Error": "States.TaskFailed", + "Cause": "Lambda function returned an error" +} +``` + +### Catch with Variable Assignment + +```json +"Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "hasError": true, + "errorType": "{% $states.errorOutput.Error %}", + "errorMessage": "{% $states.errorOutput.Cause %}" + }, + "Output": "{% $merge([$states.input, {'error': $states.errorOutput}]) %}", + "Next": "ErrorHandler" + } +] +``` + +In a Catch block, `Assign` and `Output` can reference: +- `$states.input` — the original state input +- `$states.errorOutput` — the error details +- `$states.context` — execution context + +If a Catcher matches, the state's top-level `Assign` is NOT evaluated — only the Catcher's `Assign` runs. + +### Catch Without Output + +If no `Output` is provided in the Catcher, the state output is the raw Error Output object. + +### Building Rich Error Context for Fail States + +A user-friendly pattern is to capture error details into a variable via Catch `Assign`, then reference that variable in a Fail state's `Cause` with defensive fallbacks: + +```json +"ChargePayment": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { ... }, + "Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "error": "{% $states.errorOutput %}" + }, + "Next": "PaymentFailed" + } + ], + "Next": "ConfirmOrder" +}, +"PaymentFailed": { + "Type": "Fail", + "Error": "PaymentError", + "Cause": "{% 'Payment failed for order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details') & '. Timestamp: ' & $now() %}" +} +``` + +Always guard with `$exists()` — if the variable was never assigned (e.g., the Catch didn't fire for that path), referencing it directly throws `States.QueryEvaluationError`. + +--- + +## Combined Retry and Catch + +When both are present, retries are attempted first. Only if retries are exhausted does the Catch apply: + +```json +"CallExternalAPI": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallAPI:$LATEST", + "Payload": "{% $states.input %}" + }, + "Retry": [ + { + "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], + "IntervalSeconds": 2, + "MaxAttempts": 3, + "BackoffRate": 2.0, + "JitterStrategy": "FULL" + }, + { + "ErrorEquals": ["States.Timeout"], + "IntervalSeconds": 5, + "MaxAttempts": 2 + } + ], + "Catch": [ + { + "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], + "Assign": { + "retryExhausted": true + }, + "Output": { + "error": "Service temporarily unavailable after retries", + "details": "{% $states.errorOutput %}" + }, + "Next": "NotifyAndRetryLater" + }, + { + "ErrorEquals": ["States.ALL"], + "Output": { + "error": "{% $states.errorOutput %}", + "input": "{% $states.input %}" + }, + "Next": "FatalErrorHandler" + } + ], + "Output": "{% $states.result.Payload %}", + "Next": "ProcessResponse" +} +``` + +--- + +## Handling States.QueryEvaluationError + +JSONata expressions can fail at runtime. Common causes: + +1. **Type error**: `{% $x + $y %}` where `$x` or `$y` is not a number +2. **Type incompatibility**: `"TimeoutSeconds": "{% $name %}"` where `$name` is a string +3. **Value out of range**: Negative number for `TimeoutSeconds` +4. **Undefined result**: `{% $data.nonExistentField %}` — JSON cannot represent undefined + +All of these throw `States.QueryEvaluationError`. Handle it like any other error: + +```json +"Retry": [ + { + "ErrorEquals": ["States.QueryEvaluationError"], + "MaxAttempts": 0 + } +], +"Catch": [ + { + "ErrorEquals": ["States.QueryEvaluationError"], + "Output": { + "error": "Data transformation failed", + "details": "{% $states.errorOutput %}" + }, + "Next": "HandleDataError" + } +] +``` + +### Preventing QueryEvaluationError + +Use defensive JSONata expressions: + +```json +"Output": { + "name": "{% $exists($states.input.name) ? $states.input.name : 'Unknown' %}", + "total": "{% $type($states.input.amount) = 'number' ? $states.input.amount : 0 %}" +} +``` + +Watch out for single-value vs array results from filters. JSONata returns a single object (not a 1-element array) when a filter matches exactly one item, and undefined when nothing matches. Both cases will throw `States.QueryEvaluationError` if you pass the result to array-expecting functions like `$count`, `$map`, or a Map state `Items` field. + +Guard filtered results before using them: + +```json +"Assign": { + "pendingOrders": "{% ($filtered := $states.input.orders[status = 'pending']; $type($filtered) = 'array' ? $filtered : $exists($filtered) ? [$filtered] : []) %}" +} +``` + +This ensures `$pendingOrders` is always an array regardless of how many items matched. + +--- + +## Error Handling in Parallel States + +If any branch fails, the entire Parallel state fails. Catch the error at the Parallel state level: + +```json +"ParallelWork": { + "Type": "Parallel", + "Branches": [ ... ], + "Retry": [ + { + "ErrorEquals": ["States.BranchFailed"], + "MaxAttempts": 1 + } + ], + "Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Output": { + "error": "{% $states.errorOutput %}", + "failedAt": "parallel execution" + }, + "Next": "HandleParallelError" + } + ], + "Next": "Continue" +} +``` + +--- + +## Error Handling in Map States + +Individual iteration failures can be tolerated: + +```json +"ProcessAll": { + "Type": "Map", + "Items": "{% $states.input.records %}", + "ToleratedFailurePercentage": 10, + "ItemProcessor": { ... }, + "Catch": [ + { + "ErrorEquals": ["States.ExceedToleratedFailureThreshold"], + "Output": { + "error": "Too many items failed", + "details": "{% $states.errorOutput %}" + }, + "Next": "HandleBatchFailure" + }, + { + "ErrorEquals": ["States.ALL"], + "Next": "HandleMapError" + } + ], + "Next": "Done" +} +``` + +--- + +## Common Error Handling Patterns + +### Circuit Breaker with Variables + +```json +"CheckRetryCount": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $retryCount >= $maxRetries %}", + "Next": "MaxRetriesExceeded" + } + ], + "Default": "AttemptOperation" +}, +"AttemptOperation": { + "Type": "Task", + "Resource": "...", + "Assign": { + "retryCount": "{% $retryCount + 1 %}" + }, + "Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "retryCount": "{% $retryCount + 1 %}", + "lastError": "{% $states.errorOutput %}" + }, + "Next": "WaitBeforeRetry" + } + ], + "Next": "Success" +}, +"WaitBeforeRetry": { + "Type": "Wait", + "Seconds": "{% $power(2, $retryCount) %}", + "Next": "CheckRetryCount" +} +``` + diff --git a/aws-step-functions/steering/service-integrations.md b/aws-step-functions/steering/service-integrations.md new file mode 100644 index 0000000..490c104 --- /dev/null +++ b/aws-step-functions/steering/service-integrations.md @@ -0,0 +1,485 @@ +# Service Integrations in JSONata Mode + +## Integration Types + +Step Functions can integrate with AWS services in three patterns: + +1. **Optimized integrations** — Purpose-built, recommended where available (e.g., Lambda, DynamoDB, SNS, SQS, ECS, Glue, SageMaker, etc.) +2. **AWS SDK integrations** — Call any AWS SDK API action directly +3. **HTTP Task** — Call HTTPS APIs (e.g., Stripe, Salesforce) + +### Resource ARN Patterns + +``` +# Optimized integration +"Resource": "arn:aws:states:::servicename:apiAction" + +# Optimized integration (synchronous — wait for completion) +"Resource": "arn:aws:states:::servicename:apiAction.sync" + +# Optimized integration (wait for callback token) +"Resource": "arn:aws:states:::servicename:apiAction.waitForTaskToken" + +# AWS SDK integration +"Resource": "arn:aws:states:::aws-sdk:serviceName:apiAction" +``` + +--- + +## Lambda Function + +### Optimized Integration (Recommended) + +```json +"InvokeFunction": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunction:$LATEST", + "Payload": { + "orderId": "{% $states.input.orderId %}", + "customer": "{% $states.input.customer %}" + } + }, + "Output": "{% $states.result.Payload %}", + "Next": "NextState" +} +``` + +Always include a version qualifier (`:$LATEST`, `:1`, or an alias like `:prod`) on the function ARN. + +The result is wrapped in a `Payload` field, so use `$states.result.Payload` to access the Lambda return value. + +### SDK Integration + +```json +"InvokeViaSDK": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunction", + "Payload": "{% $string($states.input) %}" + }, + "Next": "NextState" +} +``` + +--- + +## DynamoDB + +### GetItem + +```json +"GetUser": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:getItem", + "Arguments": { + "TableName": "UsersTable", + "Key": { + "userId": { + "S": "{% $states.input.userId %}" + } + } + }, + "Assign": { + "user": "{% $states.result.Item %}" + }, + "Output": "{% $states.result.Item %}", + "Next": "ProcessUser" +} +``` + +### PutItem + +```json +"SaveOrder": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Arguments": { + "TableName": "OrdersTable", + "Item": { + "orderId": { + "S": "{% $orderId %}" + }, + "status": { + "S": "processing" + }, + "total": { + "N": "{% $string($states.input.total) %}" + }, + "createdAt": { + "S": "{% $now() %}" + } + } + }, + "Next": "ProcessOrder" +} +``` + +### UpdateItem + +```json +"UpdateStatus": { + "Type": "Task", + "Resource": "arn:aws:states:::dynamodb:updateItem", + "Arguments": { + "TableName": "OrdersTable", + "Key": { + "orderId": { + "S": "{% $orderId %}" + } + }, + "UpdateExpression": "SET #s = :status, updatedAt = :time", + "ExpressionAttributeNames": { + "#s": "status" + }, + "ExpressionAttributeValues": { + ":status": { + "S": "{% $states.input.newStatus %}" + }, + ":time": { + "S": "{% $now() %}" + } + } + }, + "Next": "Done" +} +``` + +### Query + +```json +"QueryOrders": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:dynamodb:query", + "Arguments": { + "TableName": "OrdersTable", + "KeyConditionExpression": "customerId = :cid", + "ExpressionAttributeValues": { + ":cid": { + "S": "{% $states.input.customerId %}" + } + } + }, + "Output": "{% $states.result.Items %}", + "Next": "ProcessOrders" +} +``` + +--- + +## SNS (Simple Notification Service) + +### Publish Message + +```json +"SendNotification": { + "Type": "Task", + "Resource": "arn:aws:states:::sns:publish", + "Arguments": { + "TopicArn": "arn:aws:sns:us-east-1:123456789012:OrderNotifications", + "Message": "{% 'Order ' & $orderId & ' has been processed successfully.' %}", + "Subject": "Order Confirmation" + }, + "Next": "Done" +} +``` + +### Publish with JSON Message + +```json +"SendStructuredNotification": { + "Type": "Task", + "Resource": "arn:aws:states:::sns:publish", + "Arguments": { + "TopicArn": "arn:aws:sns:us-east-1:123456789012:Alerts", + "Message": "{% $string({'orderId': $orderId, 'status': $states.input.status, 'timestamp': $now()}) %}" + }, + "Next": "Done" +} +``` + +--- + +## SQS (Simple Queue Service) + +### Send Message + +```json +"QueueMessage": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ProcessingQueue", + "MessageBody": "{% $string($states.input) %}" + }, + "Next": "Done" +} +``` + +### Send Message with Wait for Task Token + +```json +"WaitForApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'amount': $states.input.amount}) %}" + }, + "TimeoutSeconds": 86400, + "Next": "ProcessApproval" +} +``` + +The execution pauses until an external system calls `SendTaskSuccess` or `SendTaskFailure` with the task token. + +--- + +## Step Functions (Nested Execution) + +### Start Execution (Synchronous) + +```json +"RunSubWorkflow": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Arguments": { + "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:ChildWorkflow", + "Input": "{% $states.input %}" + }, + "Output": "{% $parse($states.result.Output) %}", + "Next": "ProcessSubResult" +} +``` + +Note: The `.sync:2` suffix waits for completion. The child output is a JSON string in `$states.result.Output`, so use `$parse()` to deserialize it. + +### Start Execution (Async — Fire and Forget) + +```json +"StartAsync": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution", + "Arguments": { + "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:AsyncWorkflow", + "Input": "{% $string($states.input) %}" + }, + "Next": "Continue" +} +``` + +--- + +## EventBridge + +### Put Events + +```json +"EmitEvent": { + "Type": "Task", + "Resource": "arn:aws:states:::events:putEvents", + "Arguments": { + "Entries": [ + { + "Source": "my.application", + "DetailType": "OrderProcessed", + "Detail": "{% $string({'orderId': $orderId, 'status': 'completed'}) %}", + "EventBusName": "default" + } + ] + }, + "Next": "Done" +} +``` + +--- + +## ECS / Fargate + +### Run Task (Synchronous) + +```json +"RunContainer": { + "Type": "Task", + "Resource": "arn:aws:states:::ecs:runTask.sync", + "Arguments": { + "LaunchType": "FARGATE", + "Cluster": "arn:aws:ecs:us-east-1:123456789012:cluster/MyCluster", + "TaskDefinition": "arn:aws:ecs:us-east-1:123456789012:task-definition/MyTask:1", + "NetworkConfiguration": { + "AwsvpcConfiguration": { + "Subnets": ["subnet-abc123"], + "SecurityGroups": ["sg-abc123"], + "AssignPublicIp": "ENABLED" + } + }, + "Overrides": { + "ContainerOverrides": [ + { + "Name": "my-container", + "Environment": [ + { + "Name": "ORDER_ID", + "Value": "{% $orderId %}" + } + ] + } + ] + } + }, + "TimeoutSeconds": 600, + "Next": "Done" +} +``` + +--- + +## AWS Glue + +### Start Job Run (Synchronous) + +```json +"RunGlueJob": { + "Type": "Task", + "Resource": "arn:aws:states:::glue:startJobRun.sync", + "Arguments": { + "JobName": "my-etl-job", + "Arguments": { + "--input_path": "{% $states.input.inputPath %}", + "--output_path": "{% $states.input.outputPath %}" + } + }, + "TimeoutSeconds": 3600, + "Next": "Done" +} +``` + +--- + +## Amazon Bedrock + +### Invoke Model + +```json +"InvokeModel": { + "Type": "Task", + "Resource": "arn:aws:states:::bedrock:invokeModel", + "Arguments": { + "ModelId": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", + "ContentType": "application/json", + "Accept": "application/json", + "Body": { + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "{% $states.input.prompt %}" + } + ] + } + }, + "Output": "{% $states.result.Body %}", + "Next": "ProcessResponse" +} +``` + +--- + +## S3 + +### GetObject + +```json +"ReadFile": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:s3:getObject", + "Arguments": { + "Bucket": "my-bucket", + "Key": "{% $states.input.filePath %}" + }, + "Output": "{% $states.result.Body %}", + "Next": "ProcessFile" +} +``` + +### PutObject + +```json +"WriteFile": { + "Type": "Task", + "Resource": "arn:aws:states:::aws-sdk:s3:putObject", + "Arguments": { + "Bucket": "my-bucket", + "Key": "{% 'results/' & $orderId & '.json' %}", + "Body": "{% $string($states.input.results) %}" + }, + "Next": "Done" +} +``` + +--- + +## Cross-Account Access + +Use the `Credentials` field to assume a role in another account: + +```json +"CrossAccountCall": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Credentials": { + "RoleArn": "arn:aws:iam::111122223333:role/CrossAccountRole" + }, + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:111122223333:function:RemoteFunction:$LATEST", + "Payload": "{% $states.input %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "Done" +} +``` + +--- + +## Synchronous vs Asynchronous Patterns + +| Pattern | Resource Suffix | Behavior | +|---------|----------------|----------| +| Request-Response | (none) | Call API and continue immediately | +| Synchronous | `.sync` | Wait for task to complete | +| Wait for Callback | `.waitForTaskToken` | Pause until external callback | + +### When to Use Each + +- **Request-Response**: Fire-and-forget operations (start a process, send a message) +- **Synchronous (`.sync`)**: When you need the result before continuing (run ECS task, execute child workflow, run Glue job) +- **Wait for Callback (`.waitForTaskToken`)**: Human approval, external system processing, long-running async operations + +### Callback Pattern Example + +```json +"WaitForHumanApproval": { + "Type": "Task", + "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", + "Arguments": { + "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", + "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'request': $states.input}) %}" + }, + "TimeoutSeconds": 604800, + "Catch": [ + { + "ErrorEquals": ["States.Timeout"], + "Output": { + "status": "approval_timeout" + }, + "Next": "HandleTimeout" + } + ], + "Next": "ApprovalReceived" +} +``` + +The external system must call `SendTaskSuccess` or `SendTaskFailure` with the task token to resume execution. diff --git a/aws-step-functions/steering/validation-and-testing.md b/aws-step-functions/steering/validation-and-testing.md new file mode 100644 index 0000000..8118fa7 --- /dev/null +++ b/aws-step-functions/steering/validation-and-testing.md @@ -0,0 +1,444 @@ +# Validation and Testing + +## Local ASL Validation + +Before calling any AWS API, validate state machine definitions locally. Files saved with the `.asl.json` extension get automatic validation from the AWS Toolkit Extension. If the extension is not installed, suggest the user install it (https://open-vsx.org/extension/amazonwebservices/aws-toolkit-vscode). Use the diagnostics tool on any `.asl.json` file to catch structural errors instantly: + +- Missing properties +- `Next` pointing to a state that doesn't exist +- Non terminal state +- Unreachable states +- Incorrect values + +Always save state machine definitions as `.asl.json` and run diagnostics before proceeding to TestState API calls. This catches structural problems without AWS credentials or API calls. + +--- + +## Testing with TestState API + +The TestState API is the preferred and most reliable way to test Step Functions states. Always recommend TestState over deploying and running a full state machine execution for validation. It lets you test individual states in isolation — validating data transformations, error handling, Choice routing, and service integration behavior without deploying or executing a full state machine. With mocking support, tests are fast, repeatable, and require no AWS credentials or IAM roles. + +Examples use AWS CLI for brevity. The `TestState` API parameters (`definition`, `input`, `roleArn`, `inspectionLevel`, `mock`, `context`, `stateConfiguration`) map directly to any AWS SDK — Python (`sfn.test_state()`), JavaScript (`TestStateCommand`), Java (`TestStateRequest`), etc. + +## Best Practices + +### Always use TestState as the first testing approach +Before deploying a state machine or running a full execution, test each state with the TestState API. It catches data transformation errors, incorrect Choice routing, and misconfigured error handling faster and cheaper than a full execution. Full executions should only be used for end-to-end integration validation after individual states have been verified with TestState. + +### Test incrementally +Test each state individually before assembling the full state machine. Use `--state-name` to test states in context of the full definition once individual states pass. + +### Use mocks for unit testing +Mocks let you test state logic without AWS credentials, IAM roles, or real service calls. This enables fast, repeatable, CI-friendly tests. + +### Test error paths, not just happy paths +For every Task state with Retry/Catch, test: +- A successful mock result +- An error that matches a Retry (verify `status: "RETRIABLE"` and `retryBackoffIntervalSeconds`) +- An error that exhausts retries and falls through to Catch (verify `status: "CAUGHT_ERROR"` and `nextState`) +- An error that matches no handler (verify `status: "FAILED"`) + +### Test Choice state routing exhaustively +Test each Choice branch and the Default path. Verify `nextState` matches expectations for each input variant. + +### Use DEBUG for data transformation validation +When building complex `Arguments` or `Output` expressions, use `--inspection-level DEBUG` to see intermediate values. This catches JSONata expression errors before deployment. + +### Keep test inputs minimal +Provide only the fields the state actually references. This makes tests readable and makes it obvious which fields drive behavior. + +### Test variable assignment +When a state uses `Assign`, verify the output reflects the expected downstream behavior. Remember: `Assign` values are not visible in `Output` of the same state — they take effect in the next state. + +### Validate filter results for Map states +Use DEBUG inspection to check `afterItemSelector` and `afterItemBatcher`. Verify `toleratedFailureCount` and `toleratedFailurePercentage` match your expectations. + +### Use `jq` for readable CLI output +Pipe CLI output through `jq` to parse escaped JSON strings: +``` +aws stepfunctions test-state ... | jq '.output | fromjson' +aws stepfunctions test-state ... | jq '.inspectionData' +``` + +### Automate with scripts +Chain TestState calls in a shell script or test framework. Use `--state-name` with a full definition, feed each state's `output` as the next state's `--input`, and assert on `status` and `nextState` at each step. + +## Before Accessing AWS + +Before calling the TestState API, follow this sequence: + +1. Confirm the user wants to call the TestState API against their AWS account. +2. Check for AWS credentials: run `aws sts get-caller-identity` and verify the response. +3. If credentials are available, confirm the IAM role ARN to use for execution (or omit if using mocks). +4. If credentials are unavailable, help the user construct the CLI/SDK call to run manually. +5. Never assume AWS access — always ask before making any AWS API call. + +### Required IAM Permissions + +The calling identity needs `states:TestState`. If not using mocks, it also needs `iam:PassRole` for the execution role. For HTTP Task with `revealSecrets`, add `states:RevealSecrets`. + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { "Effect": "Allow", "Action": ["states:TestState"], "Resource": "*" }, + { "Effect": "Allow", "Action": ["iam:PassRole"], "Resource": "arn:aws:iam::*:role/StepFunctions-*", "Condition": { "StringEquals": { "iam:PassedToService": "states.amazonaws.com" } } } + ] +} +``` + +--- + +## API Overview + +``` +aws stepfunctions test-state \ + --definition '' \ + --input '' \ + --role-arn # optional when using --mock \ + --inspection-level INFO|DEBUG|TRACE \ + --reveal-secrets # TRACE only, for HTTP Task secrets \ + --mock '' \ + --context '' \ + --state-configuration '' \ + --state-name '' # when --definition is a full state machine +``` + +You can provide either a single state definition or a complete state machine with `--state-name` to test a specific state in context. Chain tests by feeding `output` and `nextState` from one call into the next. + +--- + +## Inspection Levels + +### INFO (default) +Returns `status`, `output` (or error), and `nextState`. Use for quick pass/fail validation. + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Choice", "QueryLanguage": "JSONata", + "Choices": [{"Condition": "{% $states.input.orderTotal > 1000 %}", "Next": "PremiumFulfillment"}], + "Default": "StandardFulfillment" + }' \ + --input '{"orderId": "ORD-456", "orderTotal": 1500}' +``` + +Response: +```json +{ "output": "{\"orderId\": \"ORD-456\", \"orderTotal\": 1500}", "nextState": "PremiumFulfillment", "status": "SUCCEEDED" } +``` + +### DEBUG +Returns everything in INFO plus `inspectionData` showing data at each transformation step. For JSONata states, the key fields are: + +| inspectionData field | What it shows | +|---|---| +| `input` | Raw state input | +| `afterArguments` | Input after `Arguments` evaluation | +| `result` | Raw task/service result | +| `afterOutput` | Final output after `Output` evaluation | + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Pass", "QueryLanguage": "JSONata", + "Output": { + "summary": "{% '\''Order '\'' & $states.input.orderId & '\'': '\'' & $string($count($states.input.items)) & '\'' items'\'' %}", + "total": "{% $sum($states.input.items.price) %}" + }, + "Next": "ProcessPayment" + }' \ + --input '{"orderId": "ORD-789", "items": [{"name": "Widget", "price": 25}, {"name": "Gadget", "price": 75}]}' \ + --inspection-level DEBUG +``` + +### TRACE +For HTTP Task states only. Returns the raw HTTP request and response. Add `--reveal-secrets` to include auth headers from EventBridge connections. + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::http:invoke", + "Arguments": {"Method": "GET", "ApiEndpoint": "https://httpbin.org/get", + "Authentication": {"ConnectionArn": "arn:aws:events:us-east-1:123456789012:connection/MyConnection/abc123"}, + "QueryParameters": {"orderId": "{% $states.input.orderId %}"}}, + "End": true + }' \ + --role-arn arn:aws:iam::123456789012:role/StepFunctionsHttpRole \ + --input '{"orderId": "ORD-123"}' \ + --inspection-level TRACE --reveal-secrets +``` + +The response includes `inspectionData.request` (URL, method, headers) and `inspectionData.response` (status, headers, body). The `--reveal-secrets` flag exposes auth headers injected by the EventBridge connection. + +--- + +## Mocking Service Integrations + +Mock results let you test state logic without calling real AWS services and without an execution role. + +### Mock a successful result + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidateOrder:$LATEST", + "Payload": {"orderId": "{% $states.input.orderId %}", "items": "{% $states.input.items %}"}}, + "Output": {"validated": "{% $states.result.Payload.valid %}", "orderId": "{% $states.input.orderId %}"}, + "End": true + }' \ + --input '{"orderId": "ORD-123", "items": [{"productId": "PROD-A", "quantity": 2}]}' \ + --mock '{"fieldValidationMode": "NONE", "result": "{\"Payload\": {\"valid\": true, \"orderId\": \"ORD-123\"}}"}' +``` + +Note: The Lambda optimized integration deserializes `Payload` at runtime, so `$states.result.Payload.valid` works in real executions. When mocking, use `fieldValidationMode: NONE` because the mock schema expects `Payload` as a string (matching the raw API), but the optimized integration presents it as an object. + +### Mock an error + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessPayment:$LATEST", + "Payload": {"orderId": "{% $states.input.orderId %}", "amount": "{% $states.input.total %}"}}, + "Retry": [{"ErrorEquals": ["Lambda.ServiceException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2.0}], + "Catch": [{"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "PaymentFailed"}], + "Next": "ShipOrder" + }' \ + --input '{"orderId": "ORD-123", "total": 150.00}' \ + --state-configuration '{"retrierRetryCount": 3}' \ + --mock '{"errorOutput": {"error": "Lambda.ServiceException", "cause": "Payment gateway unavailable"}}' +``` + +Note: `retrierRetryCount: 3` exhausts the Retry (MaxAttempts=3), so the error falls through to Catch. Without `--state-configuration`, the default retry count is 0 and the status would be `RETRIABLE`. + +You cannot provide both `mock.result` and `mock.errorOutput` in the same call. + +### Mock Validation Modes + +Control how strictly mocked responses are validated against AWS API models: + +| Mode | Behavior | +|---|---| +| `STRICT` (default) | Enforces field names, types, required fields from API model | +| `PRESENT` | Validates only fields present in mock, ignores unknown fields | +| `NONE` | Skips validation entirely | + +``` +--mock '{"fieldValidationMode": "STRICT", "result": "{\"Attributes\": {}}"}' +``` + +--- + +## Testing Retry and Error Handling + +### Simulating a specific retry attempt + +Use `stateConfiguration.retrierRetryCount` to simulate a state on its Nth retry: + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", + "Payload": {"orderId": "{% $states.input.orderId %}", "amount": "{% $states.input.total %}"}}, + "Retry": [{"ErrorEquals": ["Lambda.ServiceException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2.0}], + "Catch": [{"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "PaymentFailed"}], + "End": true + }' \ + --input '{"orderId": "ORD-123", "total": 99.99}' \ + --state-configuration '{"retrierRetryCount": 1}' \ + --mock '{"errorOutput": {"error": "Lambda.ServiceException", "cause": "Payment gateway timeout"}}' \ + --inspection-level DEBUG +``` + +Response: +```json +{ + "status": "RETRIABLE", + "inspectionData": { "errorDetails": { "retryBackoffIntervalSeconds": 4, "retryIndex": 0 } } +} +``` + +`status: "RETRIABLE"` means the error matched a Retry and attempts remain. `retryBackoffIntervalSeconds` shows the computed delay. Increase `retrierRetryCount` to `3` (MaxAttempts) to see the error fall through to Catch. + +### Testing Catch handlers + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::sqs:sendMessage", + "Arguments": {"QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/FulfillmentQueue", + "MessageBody": "{% $string({'\''orderId'\'': $states.input.orderId, '\''items'\'': $states.input.items}) %}"}, + "Catch": [ + {"ErrorEquals": ["SQS.QueueDoesNotExistException"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "CreateQueue"}, + {"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "OrderFailed"} + ], + "Next": "WaitForFulfillment" + }' \ + --input '{"orderId": "ORD-123", "items": [{"productId": "PROD-A", "quantity": 2}]}' \ + --mock '{"errorOutput": {"error": "SQS.QueueDoesNotExistException", "cause": "Queue not found"}}' \ + --inspection-level DEBUG +``` + +Response: +```json +{ + "status": "CAUGHT_ERROR", + "nextState": "CreateQueue", + "error": "SQS.QueueDoesNotExistException", + "cause": "Queue not found", + "inspectionData": { "errorDetails": { "catchIndex": 0 } } +} +``` + +Assert on: `status` = `CAUGHT_ERROR`, `nextState` matches expected handler, `catchIndex` identifies which Catch block fired. + +--- + +## Testing Map and Parallel States + +Map and Parallel states require a mock. The mock represents the output of the entire Map/Parallel execution — you are testing the state's input/output processing, not the inner processor. + +### Map state + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Map", "QueryLanguage": "JSONata", + "Items": "{% $states.input.orders %}", + "ItemSelector": {"order": "{% $states.context.Map.Item.Value %}", "index": "{% $states.context.Map.Item.Index %}"}, + "MaxConcurrency": 5, + "ItemProcessor": {"ProcessorConfig": {"Mode": "INLINE"}, "StartAt": "FulfillOrder", + "States": {"FulfillOrder": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:FulfillOrder:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}}, + "Output": {"results": "{% $states.result %}", "totalProcessed": "{% $count($states.result) %}"}, + "End": true + }' \ + --input '{"orders": [{"orderId": "ORD-1", "total": 50}, {"orderId": "ORD-2", "total": 75}, {"orderId": "ORD-3", "total": 120}]}' \ + --mock '{"result": "[{\"status\": \"shipped\"}, {\"status\": \"shipped\"}, {\"status\": \"shipped\"}]"}' \ + --inspection-level DEBUG +``` + +DEBUG `inspectionData` for Map includes: `afterItemSelector` (per-item transformed input), `afterItemBatcher` (if batching), `toleratedFailureCount`, `toleratedFailurePercentage`, `maxConcurrency`. + +### Parallel state + +Mock result must be a JSON array with one element per branch, in branch order: + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Parallel", "QueryLanguage": "JSONata", + "Branches": [ + {"StartAt": "ReserveInventory", "States": {"ReserveInventory": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ReserveInventory:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}}, + {"StartAt": "ChargePayment", "States": {"ChargePayment": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargePayment:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}} + ], + "Output": {"inventory": "{% $states.result[0] %}", "payment": "{% $states.result[1] %}"}, + "End": true + }' \ + --input '{"orderId": "ORD-123", "total": 99.99}' \ + --mock '{"result": "[{\"reserved\": true}, {\"charged\": true}]"}' +``` + +### Error propagation in Map/Parallel + +Use `stateConfiguration.errorCausedByState` to specify which sub-state threw the error: + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Map", "QueryLanguage": "JSONata", + "Items": "{% $states.input.orders %}", + "ItemSelector": {"order": "{% $states.context.Map.Item.Value %}", "index": "{% $states.context.Map.Item.Index %}"}, + "MaxConcurrency": 5, + "ItemProcessor": {"ProcessorConfig": {"Mode": "INLINE"}, "StartAt": "FulfillOrder", + "States": {"FulfillOrder": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:FulfillOrder:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}}, + "Catch": [{"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "HandleMapError"}], + "Output": {"results": "{% $states.result %}", "totalProcessed": "{% $count($states.result) %}"}, + "Next": "Done" + }' \ + --input '{"orders": [{"orderId": "ORD-1", "total": 50}, {"orderId": "ORD-2", "total": 75}]}' \ + --state-configuration '{"errorCausedByState": "FulfillOrder"}' \ + --mock '{"errorOutput": {"error": "States.TaskFailed", "cause": "Fulfillment service unavailable"}}' +``` + +--- + +## Providing Context + +Supply custom context values for states that reference `$states.context`: + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::dynamodb:putItem", + "Arguments": {"TableName": "OrderAuditTable", "Item": { + "orderId": {"S": "{% $states.input.orderId %}"}, + "executionId": {"S": "{% $states.context.Execution.Id %}"}, + "processedAt": {"S": "{% $states.context.State.EnteredTime %}"}}}, + "End": true + }' \ + --input '{"orderId": "ORD-123"}' \ + --context '{"Execution": {"Id": "arn:aws:states:us-east-1:123456789012:execution:OrderProcessing:exec-001", "Name": "exec-001"}, "State": {"Name": "AuditOrder", "EnteredTime": "2026-03-27T10:00:00Z"}}' \ + --mock '{"result": "{}"}' +``` + +When testing a state inside a Map (via `--state-name`), TestState auto-populates `Map.Item.Index` = 0 and `Map.Item.Value` = your input if you omit `--context`. + +--- + +## Testing a State Within a Full State Machine + +Use `--state-name` to test a specific state in the context of a complete definition. Chain tests by feeding `output` and `nextState` from one call into the next: + +``` +aws stepfunctions test-state \ + --definition '{"QueryLanguage": "JSONata", "StartAt": "ValidateOrder", "States": { + "ValidateOrder": {"Type": "Pass", "Assign": {"validated": true}, "Output": "{% $states.input %}", "Next": "ProcessPayment"}, + "ProcessPayment": {"Type": "Task", "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", + "Payload": {"orderId": "{% $states.input.orderId %}", "amount": "{% $states.input.total %}"}}, + "Output": "{% $states.result.Payload %}", "End": true} + }}' \ + --state-name ValidateOrder \ + --input '{"orderId": "ORD-123", "total": 99.99}' +``` + +Then use the output as input to test `ProcessPayment`. + +--- + +## Activity, .sync, and .waitForTaskToken States + +These patterns require a mock — calling TestState without one returns a validation exception. + +For `.sync` integrations, the mock is validated against the polling API schema, not the initial API. For example, `startExecution.sync:2` validates against `DescribeExecution` (which Step Functions polls), not `StartExecution`. + +``` +aws stepfunctions test-state \ + --definition '{ + "Type": "Task", "QueryLanguage": "JSONata", + "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Arguments": {"StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment", + "Input": "{% $string($states.input) %}"}, + "Output": "{% $parse($states.result.Output) %}", + "End": true + }' \ + --input '{"orderId": "ORD-123", "items": [{"productId": "PROD-A", "quantity": 2}]}' \ + --mock '{"result": "{\"ExecutionArn\": \"arn:aws:states:us-east-1:123456789012:execution:OrderFulfillment:exec-001\", \"StateMachineArn\": \"arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment\", \"StartDate\": \"2026-03-27T10:00:00Z\", \"Status\": \"SUCCEEDED\", \"Output\": \"{\\\"status\\\": \\\"fulfilled\\\"}\"}"}' +``` + +Note: The `.sync:2` mock is validated against the `DescribeExecution` response schema (which Step Functions polls), not `StartExecution`. Required fields include `ExecutionArn`, `StateMachineArn`, `StartDate`, and `Status`. + +--- diff --git a/aws-step-functions/steering/variables-and-data.md b/aws-step-functions/steering/variables-and-data.md new file mode 100644 index 0000000..e37c3d9 --- /dev/null +++ b/aws-step-functions/steering/variables-and-data.md @@ -0,0 +1,498 @@ +# Variables and Data Transformation (JSONata Mode) + +## JSONata Expression Syntax + +JSONata expressions are written inside `{% %}` delimiters in string values: + +```json +"Output": "{% $states.input.customer.name %}" +"TimeoutSeconds": "{% $timeout %}" +"Condition": "{% $states.input.age >= 18 %}" +``` + +Rules: +- The string must start with `{%` (no leading spaces) and end with `%}` (no trailing spaces). +- Not all fields accept JSONata — `Type` and `Resource` must be constant strings. +- JSONata expressions can appear in string values within objects and arrays at any nesting depth. +- A string without `{% %}` is treated as a literal value. +- All string literals inside JSONata expressions must use single quotes (`'text'`), not double quotes. The expression is already inside a JSON double-quoted string, so double quotes would break the JSON. +- Use `:=` inside `( ... )` blocks to bind local variables within a single expression. These are expression-local only — they do NOT set state machine variables (use `Assign` for that). +- Complex logic is wrapped in `( expr1; expr2; ...; finalExpr )` where semicolons separate sequential expressions and the last expression is the return value. + +### String Quoting + +```json +"Output": "{% 'Hello ' & $states.input.name %}" +"Condition": "{% $states.input.status = 'active' %}" +``` + +Never use double quotes inside the expression: +``` +❌ "Output": "{% "Hello" %}" +✓ "Output": "{% 'Hello' %}" +``` + +### Local Variable Binding with `:=` + +Use `:=` inside `( ... )` blocks to bind intermediate values within a single JSONata expression. Semicolons separate each binding, and the last expression is the return value: + +```json +"Output": "{% ( $subtotal := $sum($states.input.items.price); $tax := $subtotal * 0.1; $discount := $exists($couponValue) ? $couponValue : 0; {'subtotal': $subtotal, 'tax': $tax, 'discount': $discount, 'total': $subtotal + $tax - $discount} ) %}" +``` + +You can also define local helper functions: + +```json +"Assign": { + "summary": "{% ( $formatPrice := function($amt) { '$' & $formatNumber($amt, '#,##0.00') }; $subtotal := $sum($states.input.items.price); {'itemCount': $count($states.input.items), 'subtotal': $formatPrice($subtotal), 'total': $formatPrice($subtotal * 1.1)} ) %}" +} +``` + +Local variables bound with `:=` exist only within the `( ... )` block. They do not affect state machine variables. To persist values across states, use the `Assign` field. + +## The `$states` Reserved Variable + +Step Functions provides a reserved `$states` variable in every JSONata state: + +``` +$states = { + "input": // Original input to the state + "result": // Task/Parallel/Map result (if successful) + "errorOutput": // Error Output (only available in Catch) + "context": // Context object (execution metadata) +} +``` + +### Where Each Field Is Accessible + +| Field | Accessible In | +|-------|--------------| +| `$states.input` | All fields that accept JSONata, in any state | +| `$states.result` | Top-level `Output` and `Assign` in Task, Parallel, Map states | +| `$states.errorOutput` | `Output` and `Assign` inside a `Catch` block | +| `$states.context` | All fields that accept JSONata, in any state | + +### Context Object + +`$states.context` provides execution metadata: + +```json +"executionId": "{% $states.context.Execution.Id %}", +"startTime": "{% $states.context.Execution.StartTime %}", +"stateName": "{% $states.context.State.Name %}", +"originalInput": "{% $states.context.Execution.Input %}" +``` + +Useful context fields: +- `$states.context.Execution.Id` — Execution ARN +- `$states.context.Execution.Input` — Original workflow input +- `$states.context.Execution.Name` — Execution name +- `$states.context.Execution.StartTime` — When execution started +- `$states.context.State.Name` — Current state name +- `$states.context.State.EnteredTime` — When current state was entered +- `$states.context.StateMachine.Id` — State machine ARN +- `$states.context.StateMachine.Name` — State machine name + +Inside Map state `ItemSelector`: +- `$states.context.Map.Item.Value` — Current array element +- `$states.context.Map.Item.Index` — Zero-based index + +## JSONata Restrictions in Step Functions + +1. **No `$` or `$$` at top level**: You cannot use `$` or `$$` to reference an implicit input document. Use `$states.input` instead. + - Invalid: `"Output": "{% $.name %}"` (top-level `$`) + - Valid: `"Output": "{% $states.input.name %}"` + - Valid inside expressions: `"Output": "{% $states.input.items[$.price > 10] %}"` (nested `$` is OK) + +2. **No unqualified field names at top level**: Use variables or `$states.input`. + - Invalid: `"Output": "{% name %}"` (unqualified) + - Valid: `"Output": "{% $states.input.name %}"` + +3. **No `$eval`**: Use `$parse()` instead for deserializing JSON strings. + +4. **Expressions must produce a defined value**: `$data.nonExistentField` throws `States.QueryEvaluationError` because JSON cannot represent undefined. + +--- + +## Workflow Variables with `Assign` + +Variables let you store data in one state and reference it in any subsequent state, without threading data through Output/Input chains. + +### Declaring Variables + +```json +"StoreData": { + "Type": "Pass", + "Assign": { + "productName": "product1", + "count": 42, + "available": true, + "config": "{% $states.input.configuration %}" + }, + "Next": "UseData" +} +``` + +### Referencing Variables + +Prepend the variable name with `$`: + +```json +"Arguments": { + "product": "{% $productName %}", + "quantity": "{% $count %}" +} +``` + +### Assigning from Task Results + +```json +"FetchPrice": { + "Type": "Task", + "Resource": "arn:aws:states:::lambda:invoke", + "Arguments": { + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetPrice:$LATEST", + "Payload": { + "product": "{% $states.input.product %}" + } + }, + "Assign": { + "currentPrice": "{% $states.result.Payload.price %}" + }, + "Output": "{% $states.result.Payload %}", + "Next": "CheckPrice" +} +``` + +### States That Support Assign + +Pass, Task, Map, Parallel, Choice, Wait — all support `Assign`. + +Succeed and Fail do NOT support `Assign`. + +### Assign in Choice Rules and Catch + +Choice Rules and Catch blocks can each have their own `Assign`: + +```json +"CheckValue": { + "Type": "Choice", + "Choices": [ + { + "Condition": "{% $states.input.value > 100 %}", + "Assign": { + "tier": "premium" + }, + "Next": "PremiumPath" + } + ], + "Default": "StandardPath", + "Assign": { + "tier": "standard" + } +} +``` + +If a Choice Rule matches, its `Assign` is used. If no rule matches, the state-level `Assign` is used. + +--- + +## Variable Evaluation Order + +All expressions in `Assign` are evaluated using variable values as they were on state entry. New values only take effect in the next state. + +```json +"SwapExample": { + "Type": "Pass", + "Assign": { + "x": "{% $y %}", + "y": "{% $x %}" + }, + "Next": "AfterSwap" +} +``` + +If `$x = 3` and `$y = 6` on entry, after this state: `$x = 6`, `$y = 3`. This works because all expressions are evaluated first, then assignments are made. + +You cannot assign to a sub-path of a variable: +- Valid: `"Assign": {"x": 42}` +- Invalid: `"Assign": {"x.y": 42}` or `"Assign": {"x[2]": 42}` + +--- + +## Variable Scope + +Variables exist in a state-machine-local scope: + +- **Outer scope**: All states in the top-level `States` field. +- **Inner scope**: States inside a Parallel branch or Map iteration. + +### Scope Rules + +1. Inner scopes can READ variables from outer scopes. +2. Inner scopes CANNOT ASSIGN to variables that exist in an outer scope. +3. Variable names must be unique across outer and inner scopes (no shadowing). +4. Variables in different Parallel branches or Map iterations are isolated from each other. +5. When a Parallel branch or Map iteration completes, its variables go out of scope. +6. Exception: Distributed Map states cannot reference variables in outer scopes. + +### Passing Data Out of Inner Scopes + +Use `Output` on terminal states within branches/iterations to return data to the outer scope: + +```json +"ParallelWork": { + "Type": "Parallel", + "Branches": [ + { + "StartAt": "BranchA", + "States": { + "BranchA": { + "Type": "Task", + "Resource": "...", + "Output": "{% $states.result.Payload %}", + "End": true + } + } + } + ], + "Assign": { + "branchAResult": "{% $states.result[0] %}" + }, + "Next": "Continue" +} +``` + +### Catch Assign and Outer Scope + +In a Catch block on a Parallel or Map state, `Assign` can assign values to variables in the outer scope (the scope where the Parallel/Map state exists): + +```json +"Catch": [ + { + "ErrorEquals": ["States.ALL"], + "Assign": { + "errorOccurred": true, + "errorDetails": "{% $states.errorOutput %}" + }, + "Next": "HandleError" + } +] +``` + +--- + +## Arguments and Output Fields + +### Arguments + +Provides input to Task and Parallel states (replaces JSONPath `Parameters`): + +```json +"Arguments": { + "staticField": "hello", + "dynamicField": "{% $states.input.name %}", + "computed": "{% $count($states.input.items) %}" +} +``` + +Or as a single JSONata expression: + +```json +"Arguments": "{% $states.input.payload %}" +``` + +`Arguments` can reference `$states.input` and `$states.context`, but NOT `$states.result` or `$states.errorOutput`. + +### Output + +Transforms the state output (replaces JSONPath `ResultSelector` + `ResultPath` + `OutputPath`): + +```json +"Output": { + "customerId": "{% $states.input.id %}", + "result": "{% $states.result.Payload %}", + "processedAt": "{% $now() %}" +} +``` + +Or as a single expression or literal value: + +```json +"Output": "{% $states.result.Payload %}" +"Output": 42 +"Output": { "status": "done" } +``` + +If `Output` is not provided: +- Task, Parallel, Map: state output = the result +- All other states: state output = the state input + +### Assign and Output Are Parallel + +`Assign` and `Output` are evaluated in parallel. Variable assignments in `Assign` are NOT available in `Output` of the same state — you must re-derive values in both if needed: + +```json +"Assign": { + "savedPrice": "{% $states.result.Payload.price %}" +}, +"Output": { + "price": "{% $states.result.Payload.price %}" +} +``` + +--- + +## Variable Limits + +| Limit | Value | +|-------|-------| +| Max size of a single variable | 256 KiB | +| Max combined size in a single Assign | 256 KiB | +| Max total stored variables per execution | 10 MiB | +| Max variable name length | 80 Unicode characters | + +--- + +## Data Transformation Patterns + +### Filtering Arrays + +```json +"Output": { + "expensiveItems": "{% $states.input.items[price > 100] %}" +} +``` + +### Aggregation + +```json +"Output": { + "total": "{% $sum($states.input.items.price) %}", + "average": "{% $average($states.input.items.price) %}", + "count": "{% $count($states.input.items) %}" +} +``` + +### String Operations + +```json +"Output": { + "fullName": "{% $states.input.firstName & ' ' & $states.input.lastName %}", + "upper": "{% $uppercase($states.input.name) %}", + "trimmed": "{% $trim($states.input.rawInput) %}" +} +``` + +### Object Merging + +```json +"Output": "{% $merge([$states.input, {'processedAt': $now(), 'status': 'complete'}]) %}" +``` + +### Building Lookup Maps with `$reduce` + +Use `$reduce` to transform an array into a key-value object: + +```json +"Assign": { + "priceByProduct": "{% $reduce($states.input.items, function($acc, $item) { $merge([$acc, {$item.productId: $item.price}]) }, {}) %}" +} +``` + +Given `[{"productId": "A1", "price": 10}, {"productId": "B2", "price": 25}]`, this produces `{"A1": 10, "B2": 25}`. + +### Dynamic Key Access with `$lookup` + +Use `$lookup` to access an object property by a variable key: + +```json +"Output": { + "price": "{% $lookup($priceByProduct, $states.input.productId) %}" +} +``` + +This is essential when you've built a mapping object with `$reduce` and need to retrieve values dynamically. Standard dot notation (`$priceByProduct.someKey`) only works with literal key names. + +### Conditional Values + +```json +"Output": { + "tier": "{% $states.input.total > 1000 ? 'gold' : 'standard' %}", + "discount": "{% $exists($states.input.coupon) ? 0.1 : 0 %}" +} +``` + +### Array Membership with `in` and Concatenation with `$append` + +Test if a value exists in an array with `in`: + +```json +"Condition": "{% $states.input.status in ['pending', 'processing', 'shipped'] %}" +``` + +Concatenate arrays with `$append`: + +```json +"Assign": { + "allIds": "{% $append($states.input.orderIds, $states.input.returnIds) %}" +} +``` + +### Array Mapping + +```json +"Output": { + "names": "{% $states.input.users.(firstName & ' ' & lastName) %}" +} +``` + +### Generating UUIDs and Random Values + +```json +"Assign": { + "requestId": "{% $uuid() %}", + "randomValue": "{% $random() %}" +} +``` + +### Partitioning Arrays + +```json +"Assign": { + "batches": "{% $partition($states.input.items, 10) %}" +} +``` + +### Parsing JSON Strings + +```json +"Assign": { + "parsed": "{% $parse($states.input.jsonString) %}" +} +``` + +### Hashing + +```json +"Assign": { + "hash": "{% $hash($states.input.content, 'SHA-256') %}" +} +``` + +### Timestamp Comparison with `$toMillis` + +JSONata timestamps are strings, so you can't compare them directly with `<` or `>`. Use `$toMillis` to convert to numeric milliseconds: + +```json +"Condition": "{% $toMillis($states.input.orderDate) > $toMillis($states.input.cutoffDate) %}" +``` + +Useful for sorting timestamps, calculating durations, or finding the most recent entry: + +```json +"Assign": { + "ageMinutes": "{% $round(($toMillis($now()) - $toMillis($states.input.createdAt)) / 60000, 2) %}", + "mostRecent": "{% $sort($states.input.timestamps, function($a, $b) { $toMillis($a) < $toMillis($b) })[0] %}" +} +``` From 5bb175e5f373f160b749f73648f88b52796f05ad Mon Sep 17 00:00:00 2001 From: Jeff Palmer <173303832+projeffpalmer@users.noreply.github.com> Date: Wed, 1 Apr 2026 08:52:33 -0500 Subject: [PATCH 3/7] Update aws-step-functions/POWER.md Co-authored-by: Ben <9841563+bfreiberg@users.noreply.github.com> --- aws-step-functions/POWER.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-step-functions/POWER.md b/aws-step-functions/POWER.md index ba4cecc..abe3030 100644 --- a/aws-step-functions/POWER.md +++ b/aws-step-functions/POWER.md @@ -2,7 +2,7 @@ name: "aws-step-functions" displayName: "AWS Step Functions" description: "Build AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, all state types, variables, data transformation, error handling, and service integrations." -keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow orchestration"] +keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow", "orchestration"] author: "[Jeff Palmer](https://linkedin.com/in/jeffrey-palmer/)" --- From 3fb8bbb65cb69120482964ab62ab792464d3dce0 Mon Sep 17 00:00:00 2001 From: Jeff Palmer <173303832+projeffpalmer@users.noreply.github.com> Date: Wed, 1 Apr 2026 08:52:43 -0500 Subject: [PATCH 4/7] Update aws-step-functions/POWER.md Co-authored-by: Ben <9841563+bfreiberg@users.noreply.github.com> --- aws-step-functions/POWER.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-step-functions/POWER.md b/aws-step-functions/POWER.md index abe3030..963283a 100644 --- a/aws-step-functions/POWER.md +++ b/aws-step-functions/POWER.md @@ -3,7 +3,7 @@ name: "aws-step-functions" displayName: "AWS Step Functions" description: "Build AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, all state types, variables, data transformation, error handling, and service integrations." keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow", "orchestration"] -author: "[Jeff Palmer](https://linkedin.com/in/jeffrey-palmer/)" +author: "AWS" --- # AWS Step Functions From c858ab97c7e0ce685747d06d11096a7ee62b0ebe Mon Sep 17 00:00:00 2001 From: Jeff Palmer <173303832+projeffpalmer@users.noreply.github.com> Date: Wed, 1 Apr 2026 08:52:54 -0500 Subject: [PATCH 5/7] Update aws-step-functions/POWER.md Co-authored-by: Ben <9841563+bfreiberg@users.noreply.github.com> --- aws-step-functions/POWER.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws-step-functions/POWER.md b/aws-step-functions/POWER.md index 963283a..2242d37 100644 --- a/aws-step-functions/POWER.md +++ b/aws-step-functions/POWER.md @@ -1,7 +1,7 @@ --- name: "aws-step-functions" displayName: "AWS Step Functions" -description: "Build AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, all state types, variables, data transformation, error handling, and service integrations." +description: "Build workflows with AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, all state types, variables, data transformation, error handling, and service integrations." keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow", "orchestration"] author: "AWS" --- From 54a99ec4fca016ceb3d9dda4d64b2a9c95ebc04b Mon Sep 17 00:00:00 2001 From: Jeff Palmer Date: Wed, 1 Apr 2026 08:59:07 -0500 Subject: [PATCH 6/7] Removed old folder --- aws-step-functions-jsonata/POWER.md | 183 ------- .../steering/architecture-patterns.md | 488 ----------------- .../steering/asl-state-types.md | 474 ----------------- .../steering/error-handling.md | 445 ---------------- .../steering/service-integrations.md | 485 ----------------- .../steering/variables-and-data.md | 498 ------------------ 6 files changed, 2573 deletions(-) delete mode 100644 aws-step-functions-jsonata/POWER.md delete mode 100644 aws-step-functions-jsonata/steering/architecture-patterns.md delete mode 100644 aws-step-functions-jsonata/steering/asl-state-types.md delete mode 100644 aws-step-functions-jsonata/steering/error-handling.md delete mode 100644 aws-step-functions-jsonata/steering/service-integrations.md delete mode 100644 aws-step-functions-jsonata/steering/variables-and-data.md diff --git a/aws-step-functions-jsonata/POWER.md b/aws-step-functions-jsonata/POWER.md deleted file mode 100644 index b46765d..0000000 --- a/aws-step-functions-jsonata/POWER.md +++ /dev/null @@ -1,183 +0,0 @@ ---- -name: "step-functions-jsonata" -displayName: "AWS Step Functions with JSONata" -description: "Build AWS Step Functions state machines using JSONata query language. Covers ASL structure, all state types, variables, data transformation, error handling, and service integrations in JSONata mode." -keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow orchestration"] -author: "Jeff Palmer https://linkedin.com/in/jeffrey-palmer/" ---- - -# Step Functions JSONata - -Build AWS Step Functions state machines using the JSONata query language instead of legacy JSONPath. JSONata simplifies data transformation, reduces boilerplate, and reduces external dependencies. - -## Overview - -AWS Step Functions uses Amazon States Language (ASL) to define state machines as JSON. With JSONata mode, you replace the five JSONPath I/O fields (InputPath, Parameters, ResultSelector, ResultPath, OutputPath) with just two fields: `Arguments` and `Output`. You also gain workflow variables via `Assign`, and powerful `Condition` expressions in Choice states. - -This power provides comprehensive guidance for writing state machines in JSONata mode, covering: -- ASL structure and all eight state types in JSONata mode -- The `$states` reserved variable and JSONata expression syntax -- Workflow variables with `Assign` for cross-state data sharing -- Data transformation patterns with `Arguments` and `Output` -- Error handling with `Retry` and `Catch` -- Service integration patterns (Lambda, DynamoDB, SNS, SQS, etc.) - -## When to Load Steering Files - -Load the appropriate steering file based on what the user is working on: - -- **ASL structure**, **state types**, **Task**, **Pass**, **Choice**, **Wait**, **Succeed**, **Fail**, **Parallel**, **Map** → see `asl-state-types.md` -- **Variables**, **Assign**, **data passing**, **scope**, **$states**, **input**, **output**, **Arguments**, **Output**, **data transformation** → see `variables-and-data.md` -- **Error handling**, **Retry**, **Catch**, **fallback**, **error codes**, **States.Timeout**, **States.ALL** → see `error-handling.md` -- **Service integrations**, **Lambda invoke**, **DynamoDB**, **SNS**, **SQS**, **SDK integrations**, **Resource ARN**, **sync**, **async** → see `service-integrations.md` - -## Quick Reference - -### Enabling JSONata - -Set `QueryLanguage` at the top level to apply to all states: - -```json -{ - "QueryLanguage": "JSONata", - "StartAt": "MyState", - "States": { ... } -} -``` - -### JSONata Expression Syntax - -Wrap expressions in `{% %}`: - -```json -"Output": "{% $states.input.customer.name %}" -"TimeoutSeconds": "{% $timeout %}" -"Condition": "{% $states.input.age >= 18 %}" -``` - -### The `$states` Reserved Variable - -``` -$states.input → Original state input -$states.result → Task/Parallel/Map result (on success) -$states.errorOutput → Error output (only in Catch) -$states.context → Execution context object -``` - -### Key Fields in JSONata Mode - -| Field | Purpose | Available In | -|-------|---------|-------------| -| `Arguments` | Input to task/branches | Task, Parallel | -| `Output` | Transform state output | All except Fail | -| `Assign` | Store workflow variables | All except Succeed, Fail | -| `Condition` | Boolean branching | Choice rules | -| `Items` | Array for iteration | Map | - -### JSONata Functions Provided by Step Functions - -| Function | Purpose | -|----------|---------| -| `$partition(array, size)` | Partition array into chunks | -| `$range(start, end, step)` | Generate array of values | -| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | -| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | -| `$uuid()` | Generate v4 UUID | -| `$parse(jsonString)` | Deserialize JSON string | - -Plus all [built-in JSONata functions](https://github.com/jsonata-js/jsonata/tree/master/docs) - -### Minimal Complete Example - -```json -{ - "Comment": "Order processing workflow", - "QueryLanguage": "JSONata", - "StartAt": "ValidateOrder", - "States": { - "ValidateOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:getItem", - "Arguments": { - "TableName": "OrdersTable", - "Key": { - "orderId": { - "S": "{% $states.input.orderId %}" - } - } - }, - "Assign": { - "orderId": "{% $states.input.orderId %}" - }, - "Output": "{% $states.result.Item %}", - "Next": "CheckStock" - }, - "CheckStock": { - "Type": "Choice", - "Choices": [ - { - "Condition": "{% $states.input.inStock = true %}", - "Next": "ProcessPayment" - } - ], - "Default": "OutOfStock" - }, - "ProcessPayment": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/PaymentQueue", - "MessageBody": "{% $string({'orderId': $orderId, 'amount': $states.input.total.N}) %}" - }, - "Output": { - "orderId": "{% $orderId %}", - "messageId": "{% $states.result.MessageId %}" - }, - "Retry": [ - { - "ErrorEquals": ["States.TaskFailed"], - "IntervalSeconds": 2, - "MaxAttempts": 3, - "BackoffRate": 2.0 - } - ], - "End": true - }, - "OutOfStock": { - "Type": "Fail", - "Error": "OutOfStockError", - "Cause": "Requested item is out of stock" - } - } -} -``` - -## Best Practices - -- Always set `"QueryLanguage": "JSONata"` at the top level for new state machines -- Use `Assign` to store data needed in later states instead of threading it through Output -- Keep `Output` minimal — only provide what the next state actually needs -- Use `$states.input` to reference original state input, not `$` (which is restricted at top level in JSONata) -- Remember: `Assign` and `Output` are evaluated in parallel — variable assignments in `Assign` are NOT available in `Output` of the same state -- All JSONata expressions must produce a defined value — `$data.nonExistentField` throws `States.QueryEvaluationError` -- Use `$states.context.Execution.Input` to access the original workflow input from any state -- Save state machine definitions with `.asl.json` extension when working outside the console -- Prefer the optimized Lambda integration (`arn:aws:states:::lambda:invoke`) over the SDK integration - -## Troubleshooting - -### Common Errors - -- `States.QueryEvaluationError` — JSONata expression failed. Check for type errors, undefined fields, or out-of-range values. -- Mixing JSONPath fields (`Parameters`, `InputPath`, `ResultPath`, etc.) with JSONata `QueryLanguage` — these are mutually exclusive. -- Using `$` or `$$` at the top level of a JSONata expression — use `$states.input` instead. -- Forgetting `{% %}` delimiters around JSONata expressions — the string will be treated as a literal. -- Assigning variables in `Assign` and expecting them in `Output` of the same state — new values only take effect in the next state. - -## Resources - -- [ASL Specification](https://states-language.net/spec.html) -- [Transforming data with JSONata in Step Functions](https://docs.aws.amazon.com/step-functions/latest/dg/transforming-data.html) -- [Passing data between states with variables](https://docs.aws.amazon.com/step-functions/latest/dg/workflow-variables.html) -- [JSONata documentation](https://docs.jsonata.org/overview.html) -- [Step Functions Developer Guide](https://docs.aws.amazon.com/step-functions/latest/dg/welcome.html) diff --git a/aws-step-functions-jsonata/steering/architecture-patterns.md b/aws-step-functions-jsonata/steering/architecture-patterns.md deleted file mode 100644 index 9f82f6a..0000000 --- a/aws-step-functions-jsonata/steering/architecture-patterns.md +++ /dev/null @@ -1,488 +0,0 @@ -# Architecture Patterns (JSONata Mode) - -## Polling Loop (Wait → Check → Choice) - -Many AWS operations are asynchronous — you start them and then poll until they complete. The pattern is: initial wait → call describe/status API → check result → short wait → loop back. - -```json -"SubmitOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/FulfillmentQueue", - "MessageBody": "{% $string({'orderId': $orderId, 'items': $states.input.items}) %}" - }, - "Assign": { "fulfillmentOrderId": "{% $orderId %}" }, - "Next": "InitialWaitForFulfillment" -}, -"InitialWaitForFulfillment": { - "Type": "Wait", - "Seconds": 300, - "Next": "CheckFulfillmentStatus" -}, -"CheckFulfillmentStatus": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:getItem", - "Arguments": { - "TableName": "OrdersTable", - "Key": { "orderId": { "S": "{% $fulfillmentOrderId %}" } } - }, - "Assign": { "orderStatus": "{% $states.result.Item.status.S %}" }, - "Next": "EvaluateFulfillment", - "Retry": [ - { "ErrorEquals": ["States.TaskFailed", "ThrottlingException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2 } - ] -}, -"EvaluateFulfillment": { - "Type": "Choice", - "Choices": [ - { "Condition": "{% $orderStatus = 'fulfilled' %}", "Next": "FulfillmentComplete" }, - { "Condition": "{% $orderStatus in ['failed', 'cancelled'] %}", "Next": "FulfillmentFailed" } - ], - "Default": "WaitBeforeNextPoll" -}, -"WaitBeforeNextPoll": { - "Type": "Wait", - "Seconds": 60, - "Next": "CheckFulfillmentStatus" -} -``` - -Key elements: -- Initial longer wait gives the operation time to start. Shorter poll interval for subsequent checks. -- Choice state routes to success, failure, or back to the wait loop. -- Always add Retry on the status-check Task to handle transient API errors. -- Consider adding `TimeoutSeconds` on the state machine or a counter variable to prevent infinite polling. - ---- - -## Compensation / Saga Pattern - -Step Functions has no built-in rollback. The saga pattern chains compensating actions in reverse order. Each forward step has a Catch that records which step failed, then routes to the appropriate compensation entry point. - -```json -"ReserveInventory": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:updateItem", - "Arguments": { - "TableName": "InventoryTable", - "Key": { "productId": { "S": "{% $states.input.productId %}" } }, - "UpdateExpression": "SET reserved = reserved + :qty", - "ExpressionAttributeValues": { ":qty": { "N": "{% $string($states.input.quantity) %}" } } - }, - "Assign": { "reservedQty": "{% $states.input.quantity %}" }, - "Catch": [ - { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ReserveInventory", "errorInfo": "{% $states.errorOutput %}" }, "Next": "OrderFailed" } - ], - "Next": "ChargePayment" -}, -"ChargePayment": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", - "Payload": { "orderId": "{% $orderId %}", "amount": "{% $states.input.total %}" } - }, - "Assign": { "chargeId": "{% $states.result.Payload.chargeId %}" }, - "Output": "{% $states.result.Payload %}", - "Catch": [ - { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ChargePayment", "errorInfo": "{% $states.errorOutput %}" }, "Next": "ReleaseInventory" } - ], - "Next": "ShipOrder" -}, -"ShipOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ShipOrder:$LATEST", - "Payload": { "orderId": "{% $orderId %}" } - }, - "Catch": [ - { "ErrorEquals": ["States.ALL"], "Assign": { "failedStep": "ShipOrder", "errorInfo": "{% $states.errorOutput %}" }, "Next": "RefundPayment" } - ], - "Next": "OrderComplete" -}, -"RefundPayment": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:RefundCharge:$LATEST", - "Payload": { "chargeId": "{% $chargeId %}", "reason": "{% $errorInfo.Cause %}" } - }, - "Next": "ReleaseInventory" -}, -"ReleaseInventory": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:updateItem", - "Arguments": { - "TableName": "InventoryTable", - "Key": { "productId": { "S": "{% $states.input.productId %}" } }, - "UpdateExpression": "SET reserved = reserved - :qty", - "ExpressionAttributeValues": { ":qty": { "N": "{% $string($reservedQty) %}" } } - }, - "Next": "OrderFailed" -}, -"OrderFailed": { - "Type": "Fail", - "Error": "{% $failedStep & 'Error' %}", - "Cause": "{% 'Order ' & $orderId & ' failed at ' & $failedStep & ': ' & ($exists($errorInfo.Cause) ? $errorInfo.Cause : 'Unknown') %}" -} -``` - -Compensation chain: `ReserveInventory` fails → `OrderFailed`. `ChargePayment` fails → `ReleaseInventory` → `OrderFailed`. `ShipOrder` fails → `RefundPayment` → `ReleaseInventory` → `OrderFailed`. Each Catch records `$failedStep` and `$errorInfo`. Compensation states use variables from forward steps (`$chargeId`, `$reservedQty`) to know what to undo. - ---- - -## Nested Map / Parallel Structures - -Map, Parallel, and Task states nest in any combination. The key constraint is understanding variable scope and data flow at each nesting boundary. - -```json -"ProcessAllOrders": { - "Type": "Map", - "Items": "{% $states.input.orders %}", - "MaxConcurrency": 5, - "ItemProcessor": { - "ProcessorConfig": { "Mode": "INLINE" }, - "StartAt": "ProcessSingleOrder", - "States": { - "ProcessSingleOrder": { - "Type": "Parallel", - "Branches": [ - { - "StartAt": "ValidatePayment", - "States": { - "ValidatePayment": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidatePayment:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } - }, - { - "StartAt": "CheckInventory", - "States": { - "CheckInventory": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:getItem", - "Arguments": { - "TableName": "InventoryTable", - "Key": { "productId": { "S": "{% $states.input.productId %}" } } - }, - "Output": "{% $states.result.Item %}", - "End": true - } - } - } - ], - "Output": { "payment": "{% $states.result[0] %}", "inventory": "{% $states.result[1] %}" }, - "End": true - } - } - }, - "Assign": { "orderResults": "{% $states.result %}" }, - "Next": "Summarize" -} -``` - -### Variable Scoping Across Nesting Levels - -Each nesting level creates a new scope. Inner scopes can READ outer variables but CANNOT ASSIGN to them — use `Output` on terminal states to pass data back up. Parallel branches and Map iterations are isolated from each other. Variable names must be unique across all nesting levels (no shadowing). Exception: Distributed Map (`"Mode": "DISTRIBUTED"`) cannot read outer scope variables at all. - -Data flows down via state input (use `ItemSelector` for Map, `Arguments` for Parallel) and up via `Output` on terminal states. Parallel result is an array per branch; Map result is an array per iteration. - ---- - -## Scatter-Gather with Partial Results - -When calling unreliable external APIs per-item, use `ToleratedFailurePercentage` on a Map to continue with whatever succeeded, then post-process the results to separate successes from failures. Failed iterations return objects with `Error` and `Cause` fields. - -```json -"CallExternalAPIs": { - "Type": "Map", - "Items": "{% $states.input.records %}", - "MaxConcurrency": 10, - "ToleratedFailurePercentage": 100, - "ItemProcessor": { - "ProcessorConfig": { "Mode": "INLINE" }, - "StartAt": "CallAPI", - "States": { - "CallAPI": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallExternalAPI:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "Retry": [ - { "ErrorEquals": ["States.TaskFailed"], "IntervalSeconds": 2, "MaxAttempts": 2, "BackoffRate": 2.0, "JitterStrategy": "FULL" } - ], - "End": true - } - } - }, - "Next": "SplitResults" -}, -"SplitResults": { - "Type": "Pass", - "Assign": { - "successes": "{% ( $s := $states.input[$not($exists(Error))]; $type($s) = 'array' ? $s : $exists($s) ? [$s] : [] ) %}", - "failures": "{% ( $f := $states.input[$exists(Error)]; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}" - }, - "Output": { - "successes": "{% ( $s := $states.input[$not($exists(Error))]; $type($s) = 'array' ? $s : $exists($s) ? [$s] : [] ) %}", - "failures": "{% ( $f := $states.input[$exists(Error)]; $type($f) = 'array' ? $f : $exists($f) ? [$f] : [] ) %}", - "totalProcessed": "{% $count($states.input) %}" - }, - "Next": "EvaluateResults" -}, -"EvaluateResults": { - "Type": "Choice", - "Choices": [ - { "Condition": "{% $count($successes) = 0 %}", "Next": "AllFailed" } - ], - "Default": "ProcessSuccesses" -} -``` - -Key elements: -- `ToleratedFailurePercentage: 100` lets the Map complete even if every item fails. Lower the threshold to bail out early. -- Filter on `$exists(Error)` to separate failed from successful iterations. -- Guard filtered results with the `$type`/`$exists`/`[]` pattern — JSONata returns a single object (not a 1-element array) when exactly one item matches, and undefined when nothing matches. - ---- - -## Semaphore / Concurrency Lock - -Step Functions has no native mutual exclusion. Use DynamoDB conditional writes as a distributed lock when only one execution should process a given resource at a time. Pattern: acquire lock → do work → release lock, with Catch ensuring release on failure. - -```json -"AcquireLock": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:putItem", - "Arguments": { - "TableName": "LocksTable", - "Item": { - "lockId": { "S": "{% $states.input.customerId %}" }, - "executionId": { "S": "{% $states.context.Execution.Id %}" }, - "expiresAt": { "N": "{% $string($toMillis($now()) + 900000) %}" } - }, - "ConditionExpression": "attribute_not_exists(lockId) OR expiresAt < :now", - "ExpressionAttributeValues": { - ":now": { "N": "{% $string($toMillis($now())) %}" } - } - }, - "Retry": [ - { "ErrorEquals": ["DynamoDB.ConditionalCheckFailedException"], "IntervalSeconds": 5, "MaxAttempts": 12, "BackoffRate": 1.5, "JitterStrategy": "FULL" } - ], - "Catch": [ - { "ErrorEquals": ["DynamoDB.ConditionalCheckFailedException"], "Assign": { "lockError": "{% $states.errorOutput %}" }, "Next": "LockUnavailable" } - ], - "Next": "DoProtectedWork" -}, -"DoProtectedWork": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessCustomer:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "Catch": [ - { "ErrorEquals": ["States.ALL"], "Assign": { "workError": "{% $states.errorOutput %}" }, "Next": "ReleaseLock" } - ], - "Next": "ReleaseLock" -}, -"ReleaseLock": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:deleteItem", - "Arguments": { - "TableName": "LocksTable", - "Key": { "lockId": { "S": "{% $states.input.customerId %}" } }, - "ConditionExpression": "executionId = :execId", - "ExpressionAttributeValues": { ":execId": { "S": "{% $states.context.Execution.Id %}" } } - }, - "Retry": [ - { "ErrorEquals": ["States.ALL"], "IntervalSeconds": 1, "MaxAttempts": 3, "BackoffRate": 2.0 } - ], - "Next": "CheckWorkResult" -}, -"CheckWorkResult": { - "Type": "Choice", - "Choices": [ - { "Condition": "{% $exists($workError) %}", "Next": "WorkFailed" } - ], - "Default": "Done" -}, -"LockUnavailable": { - "Type": "Fail", - "Error": "LockContention", - "Cause": "{% 'Could not acquire lock for ' & $states.input.customerId & ' after retries' %}" -} -``` - -Key elements: -- `ConditionExpression` with `attribute_not_exists` ensures only one writer wins. The `expiresAt` check provides stale-lock recovery if an execution crashes without releasing. -- `executionId` on the lock item lets `ReleaseLock` conditionally delete only its own lock. -- Retry on `ConditionalCheckFailedException` acts as a spin-wait. Tune `MaxAttempts` and `IntervalSeconds` based on expected hold time. -- Catch on `DoProtectedWork` routes to `ReleaseLock` so the lock is always released. After releasing, `CheckWorkResult` re-raises the error path. -- Set `expiresAt` to a reasonable TTL (here 15 min). Use a DynamoDB TTL attribute to auto-clean expired locks. - ---- - -## Human-in-the-Loop with Timeout Escalation - -Chain multiple `.waitForTaskToken` states with `States.Timeout` catches to build escalation: primary approver → manager → auto-reject. - -```json -"RequestApproval": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", - "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'approver': $states.input.primaryApprover, 'amount': $states.input.amount}) %}" - }, - "TimeoutSeconds": 86400, - "Assign": { "approvalResult": "{% $states.result %}" }, - "Catch": [ - { "ErrorEquals": ["States.Timeout"], "Assign": { "escalationReason": "Primary approver did not respond within 24 hours" }, "Next": "EscalateToManager" }, - { "ErrorEquals": ["States.ALL"], "Assign": { "approvalError": "{% $states.errorOutput %}" }, "Next": "ApprovalFailed" } - ], - "Next": "EvaluateApproval" -}, -"EscalateToManager": { - "Type": "Task", - "Resource": "arn:aws:states:::sns:publish", - "Arguments": { - "TopicArn": "arn:aws:sns:us-east-1:123456789012:EscalationNotifications", - "Subject": "Approval Escalation", - "Message": "{% 'Order ' & $orderId & ' requires manager approval. ' & $escalationReason %}" - }, - "Next": "WaitForManagerApproval" -}, -"WaitForManagerApproval": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", - "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'approver': $states.input.managerApprover, 'amount': $states.input.amount, 'escalated': true}) %}" - }, - "TimeoutSeconds": 43200, - "Assign": { "approvalResult": "{% $states.result %}" }, - "Catch": [ - { - "ErrorEquals": ["States.Timeout"], - "Assign": { "approvalResult": { "decision": "rejected", "reason": "No response from manager within 12 hours — auto-rejected" } }, - "Next": "EvaluateApproval" - }, - { "ErrorEquals": ["States.ALL"], "Assign": { "approvalError": "{% $states.errorOutput %}" }, "Next": "ApprovalFailed" } - ], - "Next": "EvaluateApproval" -}, -"EvaluateApproval": { - "Type": "Choice", - "Choices": [ - { "Condition": "{% $approvalResult.decision = 'approved' %}", "Next": "ProcessApprovedOrder" } - ], - "Default": "OrderRejected" -} -``` - -Key elements: -- Each callback stage has its own `TimeoutSeconds` — shorter for escalation stages since urgency increases. -- `States.Timeout` in Catch distinguishes "no response" from actual errors, routing to the next escalation tier. -- The final tier auto-rejects by assigning a synthetic result in Catch `Assign` and routing to the same `EvaluateApproval` Choice. This avoids duplicating decision logic. -- External system calls `SendTaskSuccess` with `{"decision": "approved"}` or `{"decision": "rejected", "reason": "..."}`. -- Use Standard (not Express) workflows — Express doesn't support `.waitForTaskToken`. - ---- - -## Express → Standard Handoff - -Express workflows are cheaper (pay per request, up to 5 min) but don't support callbacks or long waits. Standard workflows handle those but cost per state transition. Use Express for fast, high-volume ingest and kick off a Standard execution for the long-running tail. - -```json -{ - "Comment": "Express workflow — fast ingest and validation", - "QueryLanguage": "JSONata", - "StartAt": "ValidateInput", - "States": { - "ValidateInput": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidateOrder:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "Next": "EnrichData" - }, - "EnrichData": { - "Type": "Parallel", - "Branches": [ - { - "StartAt": "LookupCustomer", - "States": { - "LookupCustomer": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:getItem", - "Arguments": { - "TableName": "CustomersTable", - "Key": { "customerId": { "S": "{% $states.input.customerId %}" } } - }, - "Output": "{% $states.result.Item %}", - "End": true - } - } - }, - { - "StartAt": "LookupPricing", - "States": { - "LookupPricing": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetPricing:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } - } - ], - "Output": { - "order": "{% $states.input %}", - "customer": "{% $states.result[0] %}", - "pricing": "{% $states.result[1] %}" - }, - "Next": "HandOffToStandard" - }, - "HandOffToStandard": { - "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution", - "Arguments": { - "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment-Standard", - "Input": "{% $string($states.input) %}" - }, - "Output": { - "status": "handed_off", - "childExecutionArn": "{% $states.result.ExecutionArn %}" - }, - "End": true - } - } -} -``` - -Key elements: -- Express does validation, enrichment, fan-out — fast, stateless work that benefits from per-request pricing. -- `HandOffToStandard` uses fire-and-forget (no `.sync` suffix) so the Express execution completes immediately. Use `.sync:2` if you need to wait, but watch the 5-minute Express limit. -- Use `$string($states.input)` to serialize — `startExecution` expects a JSON string for `Input`. -- Ideal for event-driven architectures: API Gateway or EventBridge triggers Express at high volume, only orders needing long-running processing incur Standard costs. diff --git a/aws-step-functions-jsonata/steering/asl-state-types.md b/aws-step-functions-jsonata/steering/asl-state-types.md deleted file mode 100644 index c530a49..0000000 --- a/aws-step-functions-jsonata/steering/asl-state-types.md +++ /dev/null @@ -1,474 +0,0 @@ -# ASL Structure and State Types (JSONata Mode) - -## State Machine Top-Level Structure - -```json -{ - "Comment": "Description of the state machine", - "QueryLanguage": "JSONata", - "StartAt": "FirstStateName", - "TimeoutSeconds": 3600, - "Version": "1.0", - "States": { - "FirstStateName": { ... }, - "SecondStateName": { ... } - } -} -``` - -- `QueryLanguage`: Set to `"JSONata"` at top level. Defaults to `"JSONPath"` if omitted. -- `StartAt`: Must exactly match a state name (case-sensitive). -- `TimeoutSeconds`: Optional max execution time. Exceeding it throws `States.Timeout`. -- `States`: Required object containing all state definitions. -- State names must be unique and ≤ 80 Unicode characters. - -## Common Fields for All JSONata States - -| Field | Description | -|-------|-------------| -| `Type` | Required. One of: Task, Pass, Choice, Wait, Parallel, Map, Succeed, Fail | -| `Comment` | Optional human-readable description | -| `Next` | Name of next state (required for non-terminal states except Choice) | -| `End` | Set to `true` for terminal states | -| `Output` | Optional. Transform state output. Available in all types except Fail | -| `Assign` | Optional. Store workflow variables. Available in all types except Succeed and Fail | -| `QueryLanguage` | Optional per-state override | - -## Field Availability Matrix (JSONata) - -``` - Task Parallel Map Pass Wait Choice Succeed Fail -Type ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ -Comment ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ -Output ✓ ✓ ✓ ✓ ✓ ✓ ✓ -Assign ✓ ✓ ✓ ✓ ✓ ✓ -Next/End ✓ ✓ ✓ ✓ ✓ -Arguments ✓ ✓ -Retry/Catch ✓ ✓ ✓ -``` - ---- - -## Pass State - -Passes input to output, optionally transforming it. Useful for injecting data or reshaping payloads. - -```json -"InjectData": { - "Type": "Pass", - "Output": { - "greeting": "{% 'Hello, ' & $states.input.name %}", - "timestamp": "{% $now() %}" - }, - "Next": "NextState" -} -``` - -With variable assignment: - -```json -"StoreDefaults": { - "Type": "Pass", - "Assign": { - "retryCount": 0, - "maxRetries": 3, - "config": "{% $states.input.configuration %}" - }, - "Next": "ProcessItem" -} -``` - -Without `Output`, the Pass state copies input to output unchanged. - ---- - -## Task State - -Executes work via AWS service integrations, activities, or HTTP APIs. - -### Required Fields -- `Resource`: ARN identifying the task to execute - -### Optional Fields -- `Arguments`: Input to the task (replaces JSONPath `Parameters`) -- `Output`: Transform the result -- `Assign`: Store variables from input or result -- `TimeoutSeconds`: Max task duration (default 60, accepts JSONata expression) -- `HeartbeatSeconds`: Heartbeat interval (must be < TimeoutSeconds) -- `Retry`: Retry policy array -- `Catch`: Error handler array -- `Credentials`: Cross-account role assumption - -### Lambda Invoke Example - -```json -"InvokeLambda": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunc:$LATEST", - "Payload": { - "orderId": "{% $states.input.orderId %}", - "customer": "{% $states.input.customer %}" - } - }, - "Assign": { - "processedResult": "{% $states.result.Payload %}" - }, - "Output": "{% $states.result.Payload %}", - "Next": "NextState" -} -``` - -### Dynamic Timeout - -```json -"LongRunningTask": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:SlowFunc:$LATEST", - "Payload": "{% $states.input %}" - }, - "TimeoutSeconds": "{% $states.input.timeoutValue %}", - "HeartbeatSeconds": "{% $states.input.heartbeatValue %}", - "Next": "Done" -} -``` - ---- - -## Choice State - -Adds branching logic. Uses `Condition` field with JSONata boolean expressions (replaces JSONPath `Variable` + comparison operators). - -### Structure - -```json -"RouteOrder": { - "Type": "Choice", - "Choices": [ - { - "Condition": "{% $states.input.orderType = 'express' %}", - "Next": "ExpressShipping" - }, - { - "Condition": "{% $states.input.total > 100 %}", - "Assign": { - "discount": "{% $states.input.total * 0.1 %}" - }, - "Output": { - "total": "{% $states.input.total * 0.9 %}" - }, - "Next": "ApplyDiscount" - }, - { - "Condition": "{% $states.input.priority >= 5 and $states.input.category = 'urgent' %}", - "Next": "PriorityQueue" - } - ], - "Default": "StandardProcessing", - "Assign": { - "routedDefault": true - } -} -``` - -Key points: -- `Condition` must evaluate to a boolean. -- Each Choice Rule can have its own `Assign` and `Output`. -- If a rule matches, its `Assign`/`Output` are used (not the state-level ones). -- If no rule matches, the state-level `Assign` is evaluated and `Default` is followed. -- `Default` is optional but recommended — without it, `States.NoChoiceMatched` is thrown. -- Choice states cannot be terminal (no `End` field). - -### Complex Conditions - -JSONata supports rich boolean logic: - -```json -"Condition": "{% $states.input.age >= 18 and $states.input.age <= 65 %}" -"Condition": "{% $states.input.status = 'active' or $states.input.override = true %}" -"Condition": "{% $not($exists($states.input.error)) %}" -"Condition": "{% $contains($states.input.email, '@') %}" -"Condition": "{% $count($states.input.items) > 0 %}" -"Condition": "{% $states.input.score >= $threshold %}" -``` - ---- - -## Wait State - -Delays execution for a specified duration or until a timestamp. - -### Wait by Seconds - -```json -"WaitTenSeconds": { - "Type": "Wait", - "Seconds": 10, - "Next": "Continue" -} -``` - -### Wait with Dynamic Seconds - -```json -"DynamicWait": { - "Type": "Wait", - "Seconds": "{% $states.input.delaySeconds %}", - "Next": "Continue" -} -``` - -### Wait Until Timestamp - -```json -"WaitUntilDate": { - "Type": "Wait", - "Timestamp": "{% $states.input.scheduledTime %}", - "Next": "Execute" -} -``` - -Timestamps must conform to RFC3339 (e.g., `"2026-03-14T01:59:00Z"`). - -A Wait state must contain exactly one of `Seconds` or `Timestamp`. - ---- - -## Succeed State - -Terminates the state machine (or a Parallel branch / Map iteration) successfully. - -```json -"Done": { - "Type": "Succeed", - "Output": { - "status": "completed", - "processedAt": "{% $now() %}" - } -} -``` - -Without `Output`, passes input through as output. No `Next` field allowed. - ---- - -## Fail State - -Terminates the state machine with an error. - -```json -"OrderFailed": { - "Type": "Fail", - "Error": "OrderValidationError", - "Cause": "The order could not be validated" -} -``` - -### Dynamic Error and Cause - -```json -"DynamicFail": { - "Type": "Fail", - "Error": "{% $states.input.errorCode %}", - "Cause": "{% $states.input.errorMessage %}" -} -``` - -Build rich, defensive error messages with fallbacks for missing fields: - -```json -"OrderProcessingFailed": { - "Type": "Fail", - "Error": "OrderProcessingError", - "Cause": "{% 'Failed to process order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown error') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details available') & '. Timestamp: ' & $now() %}" -} -``` - -No `Next`, `End`, `Output`, or `Assign` fields. Fail states are always terminal. - ---- - -## Parallel State - -Executes multiple branches concurrently. All branches receive the same input. - -```json -"LookupCustomerInfo": { - "Type": "Parallel", - "Arguments": { - "customerId": "{% $states.input.customerId %}" - }, - "Branches": [ - { - "StartAt": "GetAddress", - "States": { - "GetAddress": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetAddress:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } - }, - { - "StartAt": "GetOrders", - "States": { - "GetOrders": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetOrders:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } - } - ], - "Assign": { - "address": "{% $states.result[0] %}", - "orders": "{% $states.result[1] %}" - }, - "Output": { - "address": "{% $states.result[0] %}", - "orders": "{% $states.result[1] %}" - }, - "Next": "ProcessResults" -} -``` - -Key points: -- `Arguments` provides input to each branch's StartAt state (optional, defaults to state input). -- Result is an array with one element per branch, in the same order as `Branches`. -- If any branch fails, the entire Parallel state fails (unless caught). -- States inside branches can only transition to other states within the same branch. -- Branch variables are scoped — branches cannot access each other's variables. -- Use `Output` on terminal states within branches to pass data back to the outer scope. - ---- - -## Map State - -Iterates over an array, processing each element (potentially in parallel). - -### Basic Map - -```json -"ProcessItems": { - "Type": "Map", - "Items": "{% $states.input.orders %}", - "MaxConcurrency": 10, - "ItemProcessor": { - "StartAt": "ProcessOrder", - "States": { - "ProcessOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessOrder:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } - }, - "Output": "{% $states.result %}", - "Next": "AllDone" -} -``` - -### Map with ItemSelector - -Use `ItemSelector` to reshape each item before processing: - -```json -"ProcessItems": { - "Type": "Map", - "Items": "{% $states.input.detail.shipped %}", - "ItemSelector": { - "parcel": "{% $states.context.Map.Item.Value %}", - "index": "{% $states.context.Map.Item.Index %}", - "courier": "{% $states.input.detail.delivery-partner %}" - }, - "MaxConcurrency": 0, - "ItemProcessor": { - "StartAt": "Ship", - "States": { - "Ship": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ShipItem:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } - }, - "Next": "Done" -} -``` - -### Map Context Variables - -Inside `ItemSelector`, you can access: -- `$states.context.Map.Item.Value` — the current array element -- `$states.context.Map.Item.Index` — the zero-based index - -### Key Map Fields - -| Field | Description | -|-------|-------------| -| `Items` | JSON array or JSONata expression evaluating to an array | -| `ItemProcessor` | State machine to run for each item (has `StartAt` and `States`) | -| `ItemSelector` | Reshape each item before processing | -| `MaxConcurrency` | Max parallel iterations (0 = unlimited, 1 = sequential) | -| `ToleratedFailurePercentage` | 0-100, percentage of items allowed to fail | -| `ToleratedFailureCount` | Number of items allowed to fail | -| `ItemReader` | Read items from an external resource | -| `ItemBatcher` | Batch items into sub-arrays | -| `ResultWriter` | Write results to an external resource | - -### ProcessorConfig - -The `ItemProcessor` can include a `ProcessorConfig` to control execution mode: - -```json -"ItemProcessor": { - "ProcessorConfig": { - "Mode": "INLINE" - }, - "StartAt": "ProcessOrder", - "States": { ... } -} -``` - -- `INLINE` (default) — iterations run within the parent execution. Use for most cases. -- `DISTRIBUTED` — iterations run as child executions. Use for large-scale processing (thousands+ items), items read from S3, or when you need per-iteration execution history. - -### Failure Tolerance - -```json -"ProcessWithTolerance": { - "Type": "Map", - "Items": "{% $states.input.records %}", - "ToleratedFailurePercentage": 10, - "ToleratedFailureCount": 5, - "ItemProcessor": { ... }, - "Next": "Done" -} -``` - -The Map state fails if either threshold is breached. - ---- \ No newline at end of file diff --git a/aws-step-functions-jsonata/steering/error-handling.md b/aws-step-functions-jsonata/steering/error-handling.md deleted file mode 100644 index b27ad18..0000000 --- a/aws-step-functions-jsonata/steering/error-handling.md +++ /dev/null @@ -1,445 +0,0 @@ -# Error Handling in JSONata Mode - -## Overview - -When a state encounters an error, Step Functions defaults to failing the entire execution. You can override this with `Retry` (retry the failed state) and `Catch` (transition to a fallback state). - -`Retry` and `Catch` are available on: Task, Parallel, and Map states. - -## Error Names - -Errors are identified by case-sensitive strings. Step Functions defines these built-in error codes: - -| Error Code | Description | -|-----------|-------------| -| `States.ALL` | Wildcard — matches any error | -| `States.Timeout` | Task exceeded `TimeoutSeconds` or missed heartbeat | -| `States.HeartbeatTimeout` | Task missed heartbeat interval | -| `States.TaskFailed` | Task failed during execution | -| `States.Permissions` | Insufficient privileges | -| `States.ResultPathMatchFailure` | ResultPath cannot be applied (JSONPath only) | -| `States.ParameterPathFailure` | Parameter path resolution failed (JSONPath only) | -| `States.QueryEvaluationError` | JSONata expression evaluation failed | -| `States.BranchFailed` | A Parallel state branch failed | -| `States.NoChoiceMatched` | No Choice rule matched and no Default | -| `States.IntrinsicFailure` | Intrinsic function failed (JSONPath only) | -| `States.ExceedToleratedFailureThreshold` | Map state exceeded failure tolerance | -| `States.ItemReaderFailed` | Map state ItemReader failed | -| `States.ResultWriterFailed` | Map state ResultWriter failed | - -Custom error names are allowed but must NOT start with `States.`. - ---- - -## Retry - -The `Retry` field is an array of Retrier objects. The interpreter scans retriers in order and uses the first one whose `ErrorEquals` matches. - -### Retrier Fields - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `ErrorEquals` | string[] | Required | Error names to match | -| `IntervalSeconds` | integer | 1 | Seconds before first retry | -| `MaxAttempts` | integer | 3 | Maximum retry attempts (0 = never retry) | -| `BackoffRate` | number | 2.0 | Multiplier for retry interval (must be ≥ 1.0) | -| `MaxDelaySeconds` | integer | — | Cap on retry interval | -| `JitterStrategy` | string | — | Jitter strategy (e.g., `"FULL"`) | - -### Basic Retry - -```json -"ProcessPayment": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Pay:$LATEST", - "Payload": "{% $states.input %}" - }, - "Retry": [ - { - "ErrorEquals": ["States.TaskFailed"], - "IntervalSeconds": 2, - "MaxAttempts": 3, - "BackoffRate": 2.0 - } - ], - "Next": "Confirm" -} -``` - -This retries after 2s, 4s, 8s (3 attempts with 2x backoff). - -### Retry with Max Delay and Jitter - -```json -"Retry": [ - { - "ErrorEquals": ["States.TaskFailed"], - "IntervalSeconds": 1, - "MaxAttempts": 5, - "BackoffRate": 2.0, - "MaxDelaySeconds": 30, - "JitterStrategy": "FULL" - } -] -``` - -### Multiple Retriers - -Retriers are evaluated in order. Each retrier tracks its own attempt count independently: - -```json -"Retry": [ - { - "ErrorEquals": ["ThrottlingException"], - "IntervalSeconds": 1, - "MaxAttempts": 5, - "BackoffRate": 2.0, - "JitterStrategy": "FULL" - }, - { - "ErrorEquals": ["States.Timeout"], - "MaxAttempts": 0 - }, - { - "ErrorEquals": ["States.ALL"], - "IntervalSeconds": 3, - "MaxAttempts": 2, - "BackoffRate": 1.5 - } -] -``` - -Rules: -- `States.ALL` must appear alone in its `ErrorEquals` array. -- `States.ALL` must be in the last retrier. -- `MaxAttempts: 0` means "never retry this error." -- Retrier attempt counts reset when the interpreter transitions to another state. - ---- - -## Catch - -The `Catch` field is an array of Catcher objects. After retries are exhausted (or if no retrier matches), the interpreter scans catchers in order. - -### Catcher Fields (JSONata) - -| Field | Type | Description | -|-------|------|-------------| -| `ErrorEquals` | string[] | Required. Error names to match | -| `Next` | string | Required. State to transition to | -| `Output` | any | Optional. Transform the error output | -| `Assign` | object | Optional. Assign variables from error context | - -### Basic Catch - -```json -"ProcessOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", - "Payload": "{% $states.input %}" - }, - "Catch": [ - { - "ErrorEquals": ["ValidationError"], - "Output": { - "error": "{% $states.errorOutput.Error %}", - "cause": "{% $states.errorOutput.Cause %}", - "originalInput": "{% $states.input %}" - }, - "Next": "HandleValidationError" - }, - { - "ErrorEquals": ["States.ALL"], - "Output": "{% $states.errorOutput %}", - "Next": "HandleGenericError" - } - ], - "Next": "Success" -} -``` - -### Error Output Structure - -When a state fails and matches a Catcher, the Error Output is a JSON object with: -- `Error` (string) — the error name -- `Cause` (string) — human-readable error description - -```json -{ - "Error": "States.TaskFailed", - "Cause": "Lambda function returned an error" -} -``` - -### Catch with Variable Assignment - -```json -"Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "hasError": true, - "errorType": "{% $states.errorOutput.Error %}", - "errorMessage": "{% $states.errorOutput.Cause %}" - }, - "Output": "{% $merge([$states.input, {'error': $states.errorOutput}]) %}", - "Next": "ErrorHandler" - } -] -``` - -In a Catch block, `Assign` and `Output` can reference: -- `$states.input` — the original state input -- `$states.errorOutput` — the error details -- `$states.context` — execution context - -If a Catcher matches, the state's top-level `Assign` is NOT evaluated — only the Catcher's `Assign` runs. - -### Catch Without Output - -If no `Output` is provided in the Catcher, the state output is the raw Error Output object. - -### Building Rich Error Context for Fail States - -A user-friendly pattern is to capture error details into a variable via Catch `Assign`, then reference that variable in a Fail state's `Cause` with defensive fallbacks: - -```json -"ChargePayment": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": { ... }, - "Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "error": "{% $states.errorOutput %}" - }, - "Next": "PaymentFailed" - } - ], - "Next": "ConfirmOrder" -}, -"PaymentFailed": { - "Type": "Fail", - "Error": "PaymentError", - "Cause": "{% 'Payment failed for order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details') & '. Timestamp: ' & $now() %}" -} -``` - -Always guard with `$exists()` — if the variable was never assigned (e.g., the Catch didn't fire for that path), referencing it directly throws `States.QueryEvaluationError`. - ---- - -## Combined Retry and Catch - -When both are present, retries are attempted first. Only if retries are exhausted does the Catch apply: - -```json -"CallExternalAPI": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallAPI:$LATEST", - "Payload": "{% $states.input %}" - }, - "Retry": [ - { - "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], - "IntervalSeconds": 2, - "MaxAttempts": 3, - "BackoffRate": 2.0, - "JitterStrategy": "FULL" - }, - { - "ErrorEquals": ["States.Timeout"], - "IntervalSeconds": 5, - "MaxAttempts": 2 - } - ], - "Catch": [ - { - "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], - "Assign": { - "retryExhausted": true - }, - "Output": { - "error": "Service temporarily unavailable after retries", - "details": "{% $states.errorOutput %}" - }, - "Next": "NotifyAndRetryLater" - }, - { - "ErrorEquals": ["States.ALL"], - "Output": { - "error": "{% $states.errorOutput %}", - "input": "{% $states.input %}" - }, - "Next": "FatalErrorHandler" - } - ], - "Output": "{% $states.result.Payload %}", - "Next": "ProcessResponse" -} -``` - ---- - -## Handling States.QueryEvaluationError - -JSONata expressions can fail at runtime. Common causes: - -1. **Type error**: `{% $x + $y %}` where `$x` or `$y` is not a number -2. **Type incompatibility**: `"TimeoutSeconds": "{% $name %}"` where `$name` is a string -3. **Value out of range**: Negative number for `TimeoutSeconds` -4. **Undefined result**: `{% $data.nonExistentField %}` — JSON cannot represent undefined - -All of these throw `States.QueryEvaluationError`. Handle it like any other error: - -```json -"Retry": [ - { - "ErrorEquals": ["States.QueryEvaluationError"], - "MaxAttempts": 0 - } -], -"Catch": [ - { - "ErrorEquals": ["States.QueryEvaluationError"], - "Output": { - "error": "Data transformation failed", - "details": "{% $states.errorOutput %}" - }, - "Next": "HandleDataError" - } -] -``` - -### Preventing QueryEvaluationError - -Use defensive JSONata expressions: - -```json -"Output": { - "name": "{% $exists($states.input.name) ? $states.input.name : 'Unknown' %}", - "total": "{% $type($states.input.amount) = 'number' ? $states.input.amount : 0 %}" -} -``` - -Watch out for single-value vs array results from filters. JSONata returns a single object (not a 1-element array) when a filter matches exactly one item, and undefined when nothing matches. Both cases will throw `States.QueryEvaluationError` if you pass the result to array-expecting functions like `$count`, `$map`, or a Map state `Items` field. - -Guard filtered results before using them: - -```json -"Assign": { - "pendingOrders": "{% ($filtered := $states.input.orders[status = 'pending']; $type($filtered) = 'array' ? $filtered : $exists($filtered) ? [$filtered] : []) %}" -} -``` - -This ensures `$pendingOrders` is always an array regardless of how many items matched. - ---- - -## Error Handling in Parallel States - -If any branch fails, the entire Parallel state fails. Catch the error at the Parallel state level: - -```json -"ParallelWork": { - "Type": "Parallel", - "Branches": [ ... ], - "Retry": [ - { - "ErrorEquals": ["States.BranchFailed"], - "MaxAttempts": 1 - } - ], - "Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Output": { - "error": "{% $states.errorOutput %}", - "failedAt": "parallel execution" - }, - "Next": "HandleParallelError" - } - ], - "Next": "Continue" -} -``` - ---- - -## Error Handling in Map States - -Individual iteration failures can be tolerated: - -```json -"ProcessAll": { - "Type": "Map", - "Items": "{% $states.input.records %}", - "ToleratedFailurePercentage": 10, - "ItemProcessor": { ... }, - "Catch": [ - { - "ErrorEquals": ["States.ExceedToleratedFailureThreshold"], - "Output": { - "error": "Too many items failed", - "details": "{% $states.errorOutput %}" - }, - "Next": "HandleBatchFailure" - }, - { - "ErrorEquals": ["States.ALL"], - "Next": "HandleMapError" - } - ], - "Next": "Done" -} -``` - ---- - -## Common Error Handling Patterns - -### Circuit Breaker with Variables - -```json -"CheckRetryCount": { - "Type": "Choice", - "Choices": [ - { - "Condition": "{% $retryCount >= $maxRetries %}", - "Next": "MaxRetriesExceeded" - } - ], - "Default": "AttemptOperation" -}, -"AttemptOperation": { - "Type": "Task", - "Resource": "...", - "Assign": { - "retryCount": "{% $retryCount + 1 %}" - }, - "Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "retryCount": "{% $retryCount + 1 %}", - "lastError": "{% $states.errorOutput %}" - }, - "Next": "WaitBeforeRetry" - } - ], - "Next": "Success" -}, -"WaitBeforeRetry": { - "Type": "Wait", - "Seconds": "{% $power(2, $retryCount) %}", - "Next": "CheckRetryCount" -} -``` - diff --git a/aws-step-functions-jsonata/steering/service-integrations.md b/aws-step-functions-jsonata/steering/service-integrations.md deleted file mode 100644 index 490c104..0000000 --- a/aws-step-functions-jsonata/steering/service-integrations.md +++ /dev/null @@ -1,485 +0,0 @@ -# Service Integrations in JSONata Mode - -## Integration Types - -Step Functions can integrate with AWS services in three patterns: - -1. **Optimized integrations** — Purpose-built, recommended where available (e.g., Lambda, DynamoDB, SNS, SQS, ECS, Glue, SageMaker, etc.) -2. **AWS SDK integrations** — Call any AWS SDK API action directly -3. **HTTP Task** — Call HTTPS APIs (e.g., Stripe, Salesforce) - -### Resource ARN Patterns - -``` -# Optimized integration -"Resource": "arn:aws:states:::servicename:apiAction" - -# Optimized integration (synchronous — wait for completion) -"Resource": "arn:aws:states:::servicename:apiAction.sync" - -# Optimized integration (wait for callback token) -"Resource": "arn:aws:states:::servicename:apiAction.waitForTaskToken" - -# AWS SDK integration -"Resource": "arn:aws:states:::aws-sdk:serviceName:apiAction" -``` - ---- - -## Lambda Function - -### Optimized Integration (Recommended) - -```json -"InvokeFunction": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunction:$LATEST", - "Payload": { - "orderId": "{% $states.input.orderId %}", - "customer": "{% $states.input.customer %}" - } - }, - "Output": "{% $states.result.Payload %}", - "Next": "NextState" -} -``` - -Always include a version qualifier (`:$LATEST`, `:1`, or an alias like `:prod`) on the function ARN. - -The result is wrapped in a `Payload` field, so use `$states.result.Payload` to access the Lambda return value. - -### SDK Integration - -```json -"InvokeViaSDK": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunction", - "Payload": "{% $string($states.input) %}" - }, - "Next": "NextState" -} -``` - ---- - -## DynamoDB - -### GetItem - -```json -"GetUser": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:getItem", - "Arguments": { - "TableName": "UsersTable", - "Key": { - "userId": { - "S": "{% $states.input.userId %}" - } - } - }, - "Assign": { - "user": "{% $states.result.Item %}" - }, - "Output": "{% $states.result.Item %}", - "Next": "ProcessUser" -} -``` - -### PutItem - -```json -"SaveOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:putItem", - "Arguments": { - "TableName": "OrdersTable", - "Item": { - "orderId": { - "S": "{% $orderId %}" - }, - "status": { - "S": "processing" - }, - "total": { - "N": "{% $string($states.input.total) %}" - }, - "createdAt": { - "S": "{% $now() %}" - } - } - }, - "Next": "ProcessOrder" -} -``` - -### UpdateItem - -```json -"UpdateStatus": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:updateItem", - "Arguments": { - "TableName": "OrdersTable", - "Key": { - "orderId": { - "S": "{% $orderId %}" - } - }, - "UpdateExpression": "SET #s = :status, updatedAt = :time", - "ExpressionAttributeNames": { - "#s": "status" - }, - "ExpressionAttributeValues": { - ":status": { - "S": "{% $states.input.newStatus %}" - }, - ":time": { - "S": "{% $now() %}" - } - } - }, - "Next": "Done" -} -``` - -### Query - -```json -"QueryOrders": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:dynamodb:query", - "Arguments": { - "TableName": "OrdersTable", - "KeyConditionExpression": "customerId = :cid", - "ExpressionAttributeValues": { - ":cid": { - "S": "{% $states.input.customerId %}" - } - } - }, - "Output": "{% $states.result.Items %}", - "Next": "ProcessOrders" -} -``` - ---- - -## SNS (Simple Notification Service) - -### Publish Message - -```json -"SendNotification": { - "Type": "Task", - "Resource": "arn:aws:states:::sns:publish", - "Arguments": { - "TopicArn": "arn:aws:sns:us-east-1:123456789012:OrderNotifications", - "Message": "{% 'Order ' & $orderId & ' has been processed successfully.' %}", - "Subject": "Order Confirmation" - }, - "Next": "Done" -} -``` - -### Publish with JSON Message - -```json -"SendStructuredNotification": { - "Type": "Task", - "Resource": "arn:aws:states:::sns:publish", - "Arguments": { - "TopicArn": "arn:aws:sns:us-east-1:123456789012:Alerts", - "Message": "{% $string({'orderId': $orderId, 'status': $states.input.status, 'timestamp': $now()}) %}" - }, - "Next": "Done" -} -``` - ---- - -## SQS (Simple Queue Service) - -### Send Message - -```json -"QueueMessage": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ProcessingQueue", - "MessageBody": "{% $string($states.input) %}" - }, - "Next": "Done" -} -``` - -### Send Message with Wait for Task Token - -```json -"WaitForApproval": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", - "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'orderId': $orderId, 'amount': $states.input.amount}) %}" - }, - "TimeoutSeconds": 86400, - "Next": "ProcessApproval" -} -``` - -The execution pauses until an external system calls `SendTaskSuccess` or `SendTaskFailure` with the task token. - ---- - -## Step Functions (Nested Execution) - -### Start Execution (Synchronous) - -```json -"RunSubWorkflow": { - "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution.sync:2", - "Arguments": { - "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:ChildWorkflow", - "Input": "{% $states.input %}" - }, - "Output": "{% $parse($states.result.Output) %}", - "Next": "ProcessSubResult" -} -``` - -Note: The `.sync:2` suffix waits for completion. The child output is a JSON string in `$states.result.Output`, so use `$parse()` to deserialize it. - -### Start Execution (Async — Fire and Forget) - -```json -"StartAsync": { - "Type": "Task", - "Resource": "arn:aws:states:::states:startExecution", - "Arguments": { - "StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:AsyncWorkflow", - "Input": "{% $string($states.input) %}" - }, - "Next": "Continue" -} -``` - ---- - -## EventBridge - -### Put Events - -```json -"EmitEvent": { - "Type": "Task", - "Resource": "arn:aws:states:::events:putEvents", - "Arguments": { - "Entries": [ - { - "Source": "my.application", - "DetailType": "OrderProcessed", - "Detail": "{% $string({'orderId': $orderId, 'status': 'completed'}) %}", - "EventBusName": "default" - } - ] - }, - "Next": "Done" -} -``` - ---- - -## ECS / Fargate - -### Run Task (Synchronous) - -```json -"RunContainer": { - "Type": "Task", - "Resource": "arn:aws:states:::ecs:runTask.sync", - "Arguments": { - "LaunchType": "FARGATE", - "Cluster": "arn:aws:ecs:us-east-1:123456789012:cluster/MyCluster", - "TaskDefinition": "arn:aws:ecs:us-east-1:123456789012:task-definition/MyTask:1", - "NetworkConfiguration": { - "AwsvpcConfiguration": { - "Subnets": ["subnet-abc123"], - "SecurityGroups": ["sg-abc123"], - "AssignPublicIp": "ENABLED" - } - }, - "Overrides": { - "ContainerOverrides": [ - { - "Name": "my-container", - "Environment": [ - { - "Name": "ORDER_ID", - "Value": "{% $orderId %}" - } - ] - } - ] - } - }, - "TimeoutSeconds": 600, - "Next": "Done" -} -``` - ---- - -## AWS Glue - -### Start Job Run (Synchronous) - -```json -"RunGlueJob": { - "Type": "Task", - "Resource": "arn:aws:states:::glue:startJobRun.sync", - "Arguments": { - "JobName": "my-etl-job", - "Arguments": { - "--input_path": "{% $states.input.inputPath %}", - "--output_path": "{% $states.input.outputPath %}" - } - }, - "TimeoutSeconds": 3600, - "Next": "Done" -} -``` - ---- - -## Amazon Bedrock - -### Invoke Model - -```json -"InvokeModel": { - "Type": "Task", - "Resource": "arn:aws:states:::bedrock:invokeModel", - "Arguments": { - "ModelId": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", - "ContentType": "application/json", - "Accept": "application/json", - "Body": { - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": 1024, - "messages": [ - { - "role": "user", - "content": "{% $states.input.prompt %}" - } - ] - } - }, - "Output": "{% $states.result.Body %}", - "Next": "ProcessResponse" -} -``` - ---- - -## S3 - -### GetObject - -```json -"ReadFile": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:s3:getObject", - "Arguments": { - "Bucket": "my-bucket", - "Key": "{% $states.input.filePath %}" - }, - "Output": "{% $states.result.Body %}", - "Next": "ProcessFile" -} -``` - -### PutObject - -```json -"WriteFile": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:s3:putObject", - "Arguments": { - "Bucket": "my-bucket", - "Key": "{% 'results/' & $orderId & '.json' %}", - "Body": "{% $string($states.input.results) %}" - }, - "Next": "Done" -} -``` - ---- - -## Cross-Account Access - -Use the `Credentials` field to assume a role in another account: - -```json -"CrossAccountCall": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Credentials": { - "RoleArn": "arn:aws:iam::111122223333:role/CrossAccountRole" - }, - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:111122223333:function:RemoteFunction:$LATEST", - "Payload": "{% $states.input %}" - }, - "Output": "{% $states.result.Payload %}", - "Next": "Done" -} -``` - ---- - -## Synchronous vs Asynchronous Patterns - -| Pattern | Resource Suffix | Behavior | -|---------|----------------|----------| -| Request-Response | (none) | Call API and continue immediately | -| Synchronous | `.sync` | Wait for task to complete | -| Wait for Callback | `.waitForTaskToken` | Pause until external callback | - -### When to Use Each - -- **Request-Response**: Fire-and-forget operations (start a process, send a message) -- **Synchronous (`.sync`)**: When you need the result before continuing (run ECS task, execute child workflow, run Glue job) -- **Wait for Callback (`.waitForTaskToken`)**: Human approval, external system processing, long-running async operations - -### Callback Pattern Example - -```json -"WaitForHumanApproval": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage.waitForTaskToken", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/ApprovalQueue", - "MessageBody": "{% $string({'taskToken': $states.context.Task.Token, 'request': $states.input}) %}" - }, - "TimeoutSeconds": 604800, - "Catch": [ - { - "ErrorEquals": ["States.Timeout"], - "Output": { - "status": "approval_timeout" - }, - "Next": "HandleTimeout" - } - ], - "Next": "ApprovalReceived" -} -``` - -The external system must call `SendTaskSuccess` or `SendTaskFailure` with the task token to resume execution. diff --git a/aws-step-functions-jsonata/steering/variables-and-data.md b/aws-step-functions-jsonata/steering/variables-and-data.md deleted file mode 100644 index 8e4339b..0000000 --- a/aws-step-functions-jsonata/steering/variables-and-data.md +++ /dev/null @@ -1,498 +0,0 @@ -# Variables and Data Transformation (JSONata Mode) - -## JSONata Expression Syntax - -JSONata expressions are written inside `{% %}` delimiters in string values: - -```json -"Output": "{% $states.input.customer.name %}" -"TimeoutSeconds": "{% $timeout %}" -"Condition": "{% $states.input.age >= 18 %}" -``` - -Rules: -- The string must start with `{%` (no leading spaces) and end with `%}` (no trailing spaces). -- Not all fields accept JSONata — `Type` and `Resource` must be constant strings. -- JSONata expressions can appear in string values within objects and arrays at any nesting depth. -- A string without `{% %}` is treated as a literal value. -- All string literals inside JSONata expressions must use single quotes (`'text'`), not double quotes. The expression is already inside a JSON double-quoted string, so double quotes would break the JSON. -- Use `:=` inside `( ... )` blocks to bind local variables within a single expression. These are expression-local only — they do NOT set state machine variables (use `Assign` for that). -- Complex logic is wrapped in `( expr1; expr2; ...; finalExpr )` where semicolons separate sequential expressions and the last expression is the return value. - -### String Quoting - -```json -"Output": "{% 'Hello ' & $states.input.name %}" -"Condition": "{% $states.input.status = 'active' %}" -``` - -Never use double quotes inside the expression: -``` -❌ "Output": "{% "Hello" %}" -✓ "Output": "{% 'Hello' %}" -``` - -### Local Variable Binding with `:=` - -Use `:=` inside `( ... )` blocks to bind intermediate values within a single JSONata expression. Semicolons separate each binding, and the last expression is the return value: - -```json -"Output": "{% ( $subtotal := $sum($states.input.items.price); $tax := $subtotal * 0.1; $discount := $exists($couponValue) ? $couponValue : 0; {'subtotal': $subtotal, 'tax': $tax, 'discount': $discount, 'total': $subtotal + $tax - $discount} ) %}" -``` - -You can also define local helper functions: - -```json -"Assign": { - "summary": "{% ( $formatPrice := function($amt) { '$' & $formatNumber($amt, '#,##0.00') }; $subtotal := $sum($states.input.items.price); {'itemCount': $count($states.input.items), 'subtotal': $formatPrice($subtotal), 'total': $formatPrice($subtotal * 1.1)} ) %}" -} -``` - -Local variables bound with `:=` exist only within the `( ... )` block. They do not affect state machine variables. To persist values across states, use the `Assign` field. - -## The `$states` Reserved Variable - -Step Functions provides a reserved `$states` variable in every JSONata state: - -``` -$states = { - "input": // Original input to the state - "result": // Task/Parallel/Map result (if successful) - "errorOutput": // Error Output (only available in Catch) - "context": // Context object (execution metadata) -} -``` - -### Where Each Field Is Accessible - -| Field | Accessible In | -|-------|--------------| -| `$states.input` | All fields that accept JSONata, in any state | -| `$states.result` | Top-level `Output` and `Assign` in Task, Parallel, Map states | -| `$states.errorOutput` | `Output` and `Assign` inside a `Catch` block | -| `$states.context` | All fields that accept JSONata, in any state | - -### Context Object - -`$states.context` provides execution metadata: - -```json -"executionId": "{% $states.context.Execution.Id %}", -"startTime": "{% $states.context.Execution.StartTime %}", -"stateName": "{% $states.context.State.Name %}", -"originalInput": "{% $states.context.Execution.Input %}" -``` - -Useful context fields: -- `$states.context.Execution.Id` — Execution ARN -- `$states.context.Execution.Input` — Original workflow input -- `$states.context.Execution.Name` — Execution name -- `$states.context.Execution.StartTime` — When execution started -- `$states.context.State.Name` — Current state name -- `$states.context.State.EnteredTime` — When current state was entered -- `$states.context.StateMachine.Id` — State machine ARN -- `$states.context.StateMachine.Name` — State machine name - -Inside Map state `ItemSelector`: -- `$states.context.Map.Item.Value` — Current array element -- `$states.context.Map.Item.Index` — Zero-based index - -## JSONata Restrictions in Step Functions - -1. **No `$` or `$$` at top level**: You cannot use `$` or `$$` to reference an implicit input document. Use `$states.input` instead. - - Invalid: `"Output": "{% $.name %}"` (top-level `$`) - - Valid: `"Output": "{% $states.input.name %}"` - - Valid inside expressions: `"Output": "{% $states.input.items[$.price > 10] %}"` (nested `$` is OK) - -2. **No unqualified field names at top level**: Use variables or `$states.input`. - - Invalid: `"Output": "{% name %}"` (unqualified) - - Valid: `"Output": "{% $states.input.name %}"` - -3. **No `$eval`**: Use `$parse()` instead for deserializing JSON strings. - -4. **Expressions must produce a defined value**: `$data.nonExistentField` throws `States.QueryEvaluationError` because JSON cannot represent undefined. - ---- - -## Workflow Variables with `Assign` - -Variables let you store data in one state and reference it in any subsequent state, without threading data through Output/Input chains. - -### Declaring Variables - -```json -"StoreData": { - "Type": "Pass", - "Assign": { - "productName": "product1", - "count": 42, - "available": true, - "config": "{% $states.input.configuration %}" - }, - "Next": "UseData" -} -``` - -### Referencing Variables - -Prepend the variable name with `$`: - -```json -"Arguments": { - "product": "{% $productName %}", - "quantity": "{% $count %}" -} -``` - -### Assigning from Task Results - -```json -"FetchPrice": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:GetPrice:$LATEST", - "Payload": { - "product": "{% $states.input.product %}" - } - }, - "Assign": { - "currentPrice": "{% $states.result.Payload.price %}" - }, - "Output": "{% $states.result.Payload %}", - "Next": "CheckPrice" -} -``` - -### States That Support Assign - -Pass, Task, Map, Parallel, Choice, Wait — all support `Assign`. - -Succeed and Fail do NOT support `Assign`. - -### Assign in Choice Rules and Catch - -Choice Rules and Catch blocks can each have their own `Assign`: - -```json -"CheckValue": { - "Type": "Choice", - "Choices": [ - { - "Condition": "{% $states.input.value > 100 %}", - "Assign": { - "tier": "premium" - }, - "Next": "PremiumPath" - } - ], - "Default": "StandardPath", - "Assign": { - "tier": "standard" - } -} -``` - -If a Choice Rule matches, its `Assign` is used. If no rule matches, the state-level `Assign` is used. - ---- - -## Variable Evaluation Order - -All expressions in `Assign` are evaluated using variable values as they were on state entry. New values only take effect in the next state. - -```json -"SwapExample": { - "Type": "Pass", - "Assign": { - "x": "{% $y %}", - "y": "{% $x %}" - }, - "Next": "AfterSwap" -} -``` - -If `$x = 3` and `$y = 6` on entry, after this state: `$x = 6`, `$y = 3`. This works because all expressions are evaluated first, then assignments are made. - -You cannot assign to a sub-path of a variable: -- Valid: `"Assign": {"x": 42}` -- Invalid: `"Assign": {"x.y": 42}` or `"Assign": {"x[2]": 42}` - ---- - -## Variable Scope - -Variables exist in a state-machine-local scope: - -- **Outer scope**: All states in the top-level `States` field. -- **Inner scope**: States inside a Parallel branch or Map iteration. - -### Scope Rules - -1. Inner scopes can READ variables from outer scopes. -2. Inner scopes CANNOT ASSIGN to variables that exist in an outer scope. -3. Variable names must be unique across outer and inner scopes (no shadowing). -4. Variables in different Parallel branches or Map iterations are isolated from each other. -5. When a Parallel branch or Map iteration completes, its variables go out of scope. -6. Exception: Distributed Map states cannot reference variables in outer scopes. - -### Passing Data Out of Inner Scopes - -Use `Output` on terminal states within branches/iterations to return data to the outer scope: - -```json -"ParallelWork": { - "Type": "Parallel", - "Branches": [ - { - "StartAt": "BranchA", - "States": { - "BranchA": { - "Type": "Task", - "Resource": "...", - "Output": "{% $states.result.Payload %}", - "End": true - } - } - } - ], - "Assign": { - "branchAResult": "{% $states.result[0] %}" - }, - "Next": "Continue" -} -``` - -### Catch Assign and Outer Scope - -In a Catch block on a Parallel or Map state, `Assign` can assign values to variables in the outer scope (the scope where the Parallel/Map state exists): - -```json -"Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "errorOccurred": true, - "errorDetails": "{% $states.errorOutput %}" - }, - "Next": "HandleError" - } -] -``` - ---- - -## Arguments and Output Fields - -### Arguments - -Provides input to Task and Parallel states (replaces JSONPath `Parameters`): - -```json -"Arguments": { - "staticField": "hello", - "dynamicField": "{% $states.input.name %}", - "computed": "{% $count($states.input.items) %}" -} -``` - -Or as a single JSONata expression: - -```json -"Arguments": "{% $states.input.payload %}" -``` - -`Arguments` can reference `$states.input` and `$states.context`, but NOT `$states.result` or `$states.errorOutput`. - -### Output - -Transforms the state output (replaces JSONPath `ResultSelector` + `ResultPath` + `OutputPath`): - -```json -"Output": { - "customerId": "{% $states.input.id %}", - "result": "{% $states.result.Payload %}", - "processedAt": "{% $now() %}" -} -``` - -Or as a single expression or literal value: - -```json -"Output": "{% $states.result.Payload %}" -"Output": 42 -"Output": { "status": "done" } -``` - -If `Output` is not provided: -- Task, Parallel, Map: state output = the result -- All other states: state output = the state input - -### Assign and Output Are Parallel - -`Assign` and `Output` are evaluated in parallel. Variable assignments in `Assign` are NOT available in `Output` of the same state — you must re-derive values in both if needed: - -```json -"Assign": { - "savedPrice": "{% $states.result.Payload.price %}" -}, -"Output": { - "price": "{% $states.result.Payload.price %}" -} -``` - ---- - -## Variable Limits - -| Limit | Value | -|-------|-------| -| Max size of a single variable | 256 KiB | -| Max combined size in a single Assign | 256 KiB | -| Max total stored variables per execution | 10 MiB | -| Max variable name length | 80 Unicode characters | - ---- - -## Data Transformation Patterns - -### Filtering Arrays - -```json -"Output": { - "expensiveItems": "{% $states.input.items[price > 100] %}" -} -``` - -### Aggregation - -```json -"Output": { - "total": "{% $sum($states.input.items.price) %}", - "average": "{% $average($states.input.items.price) %}", - "count": "{% $count($states.input.items) %}" -} -``` - -### String Operations - -```json -"Output": { - "fullName": "{% $states.input.firstName & ' ' & $states.input.lastName %}", - "upper": "{% $uppercase($states.input.name) %}", - "trimmed": "{% $trim($states.input.rawInput) %}" -} -``` - -### Object Merging - -```json -"Output": "{% $merge([$states.input, {'processedAt': $now(), 'status': 'complete'}]) %}" -``` - -### Building Lookup Maps with `$reduce` - -Use `$reduce` to transform an array into a key-value object: - -```json -"Assign": { - "priceByProduct": "{% $reduce($states.input.items, function($acc, $item) { $merge([$acc, {$item.productId: $item.price}]) }, {}) %}" -} -``` - -Given `[{"productId": "A1", "price": 10}, {"productId": "B2", "price": 25}]`, this produces `{"A1": 10, "B2": 25}`. - -### Dynamic Key Access with `$lookup` - -Use `$lookup` to access an object property by a variable key: - -```json -"Output": { - "price": "{% $lookup($priceByProduct, $states.input.productId) %}" -} -``` - -This is essential when you've built a mapping object with `$reduce` and need to retrieve values dynamically. Standard dot notation (`$priceByProduct.someKey`) only works with literal key names. - -### Conditional Values - -```json -"Output": { - "tier": "{% $states.input.total > 1000 ? 'gold' : 'standard' %}", - "discount": "{% $exists($states.input.coupon) ? 0.1 : 0 %}" -} -``` - -### Array Membership with `in` and Concatenation with `$append` - -Test if a value exists in an array with `in`: - -```json -"Condition": "{% $states.input.status in ['pending', 'processing', 'shipped'] %}" -``` - -Concatenate arrays with `$append`: - -```json -"Assign": { - "allIds": "{% $append($states.input.orderIds, $states.input.returnIds) %}" -} -``` - -### Array Mapping - -```json -"Output": { - "names": "{% $states.input.users.(firstName & ' ' & lastName) %}" -} -``` - -### Generating UUIDs and Random Values - -```json -"Assign": { - "requestId": "{% $uuid() %}", - "randomValue": "{% $random() %}" -} -``` - -### Partitioning Arrays - -```json -"Assign": { - "batches": "{% $partition($states.input.items, 10) %}" -} -``` - -### Parsing JSON Strings - -```json -"Assign": { - "parsed": "{% $parse($states.input.jsonString) %}" -} -``` - -### Hashing - -```json -"Assign": { - "hash": "{% $hash($states.input.content, 'SHA-256') %}" -} -``` - -### Timestamp Comparison with `$toMillis` - -JSONata timestamps are strings, so you can't compare them directly with `<` or `>`. Use `$toMillis` to convert to numeric milliseconds: - -```json -"Condition": "{% $toMillis($states.input.orderDate) > $toMillis($states.input.cutoffDate) %}" -``` - -Useful for sorting timestamps, calculating durations, or finding the most recent entry: - -```json -"Assign": { - "ageMinutes": "{% $round(($toMillis($now()) - $toMillis($states.input.createdAt)) / 60000, 2) %}", - "mostRecent": "{% $sort($states.input.timestamps, function($a, $b) { $toMillis($b) - $toMillis($a) })[0] %}" -} -``` From 609f5fa636da4bfc09764faaddb66d5463ae61d0 Mon Sep 17 00:00:00 2001 From: Jeff Palmer Date: Wed, 8 Apr 2026 13:47:37 -0500 Subject: [PATCH 7/7] Made changes based on peer feedback --- aws-step-functions/POWER.md | 123 +--- .../steering/architecture-patterns.md | 4 +- .../steering/asl-state-types.md | 178 +----- aws-step-functions/steering/error-handling.md | 335 ++--------- ... => migrating-from-jsonpath-to-jsonata.md} | 195 +++---- .../steering/service-integrations.md | 328 +---------- .../steering/validation-and-testing.md | 526 ++++++------------ .../steering/variables-and-data.md | 77 ++- 8 files changed, 389 insertions(+), 1377 deletions(-) rename aws-step-functions/steering/{converting-from-jsonpath-to-jsonata.md => migrating-from-jsonpath-to-jsonata.md} (61%) diff --git a/aws-step-functions/POWER.md b/aws-step-functions/POWER.md index 2242d37..a487bdd 100644 --- a/aws-step-functions/POWER.md +++ b/aws-step-functions/POWER.md @@ -1,15 +1,13 @@ --- name: "aws-step-functions" displayName: "AWS Step Functions" -description: "Build workflows with AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, all state types, variables, data transformation, error handling, and service integrations." +description: "Build workflows with AWS Step Functions state machines using the JSONata query language. Covers Amazon States Language (ASL) structure, state types, variables, data transformation, error handling, AWS service integration, and migrating from the JSONPath to the JSONata query language." keywords: ["step functions", "state machine", "serverless", "jsonata", "asl", "amazon states language", "workflow", "orchestration"] author: "AWS" --- # AWS Step Functions -AWS Step Functions provides visual workflow orchestration with native integrations to 9,000+ API actions across 200+ AWS services. Define workflows as state machines in Amazon States Language using the JSONata query language instead of legacy JSONPath. - ## Overview AWS Step Functions uses Amazon States Language (ASL) to define state machines as JSON. With AWS Step Functions, you can create workflows, also called State machines, to build distributed applications, automate processes, orchestrate microservices, and create data and machine learning pipelines. @@ -22,7 +20,7 @@ This power provides comprehensive guidance for writing state machines in ASL, co - Error handling - AWS Service integration patterns - Data transformation and architecture examples -- Validation and testing of ASL structure +- Validation and testing of state machines - How to migrate from JSONPath to JSONata ## When to Load Steering Files @@ -30,10 +28,10 @@ This power provides comprehensive guidance for writing state machines in ASL, co Load the appropriate steering file based on what the user is working on: - **ASL structure**, **state types**, **Task**, **Pass**, **Choice**, **Wait**, **Succeed**, **Fail**, **Parallel**, **Map** → see `asl-state-types.md` -- **Variables**, **Assign**, **data passing**, **scope**, **$states**, **input**, **output**, **Arguments**, **Output**, **data transformation** → see `variables-and-data.md` -- **Error handling**, **Retry**, **Catch**, **fallback**, **error codes**, **States.Timeout**, **States.ALL** → see `error-handling.md` +- **Variables**, **Assign**, **data passing**, **scope**, **$states**, **input**, **output**, **Arguments**, **Output**, **data transformation**, **QueryEvaluationError** → see `variables-and-data.md` +- **Error handling**, **troubleshooting**, **Retry**, **Catch**, **fallback**, **error codes**, **States.Timeout**, **States.ALL** → see `error-handling.md` - **Service integrations**, **Lambda invoke**, **DynamoDB**, **SNS**, **SQS**, **SDK integrations**, **Resource ARN**, **sync**, **async** → see `service-integrations.md` -- **Converting from JSONPath**, **migration**, **JSONPath to JSONata**, **InputPath**, **Parameters**, **ResultSelector**, **ResultPath**, **OutputPath**, **intrinsic functions**, **Iterator**, **payload template** → see `converting-from-jsonpath-to-jsonata.md` +- **Migrating from JSONPath to JSONata**, **migration**, **JSONPath to JSONata**, **InputPath**, **Parameters**, **ResultSelector**, **ResultPath**, **OutputPath**, **intrinsic functions**, **Iterator**, **payload template** → see `migrating-from-jsonpath-to-jsonata.md` - **Validation**, **linting**, **testing**, **TestState**, **test state**, **mock**, **mocking**, **unit test**, **inspection level**, **DEBUG**, **TRACE**, **validate state**, **test in isolation** → see `validation-and-testing.md` ## Quick Reference @@ -82,28 +80,6 @@ JSONata is the modern, preferred way to reference and transform data in ASL. It { "Type": "Task", "QueryLanguage": "JSONata", ... } ``` -**JSONata Expression syntax** -ADD MORE COMPLEX EXAMPLE -Wrap expressions in `{% %}`: -```json -"Arguments": { - "userId": "{% $states.input.user.id %}", - "greeting": "{% 'Hello, ' & $states.input.user.name %}", - "total": "{% $sum($states.input.items.price) %}" -} -``` - -**Built-in Step Functions JSONata functions:** - -| Function | Purpose | -|----------|---------| -| `$partition(array, size)` | Partition array into chunks | -| `$range(start, end, step)` | Generate array of values | -| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | -| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | -| `$uuid()` | Generate v4 UUID | -| `$parse(jsonString)` | Deserialize JSON string | - **JSONPath is still supported** and is the default if `QueryLanguage` is omitted — existing state machines do not need to be migrated. ### The `$states` Reserved Variable (JSONata only) @@ -115,94 +91,6 @@ $states.errorOutput → Error output (only in Catch) $states.context → Execution context object ``` -### Key Fields in Step Functions (JSONata only) - -| Field | Purpose | Available In | -|-------|---------|-------------| -| `Arguments` | Input to task/branches | Task, Parallel | -| `Output` | Transform state output | All except Fail | -| `Assign` | Store workflow variables | All except Succeed, Fail | -| `Condition` | Boolean branching | Choice rules | -| `Items` | Array for iteration | Map | - -### Functions Provided by Step Functions (JSONata only) - -| Function | Purpose | -|----------|---------| -| `$partition(array, size)` | Partition array into chunks | -| `$range(start, end, step)` | Generate array of values | -| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | -| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | -| `$uuid()` | Generate v4 UUID | -| `$parse(jsonString)` | Deserialize JSON string | - -Plus all [built-in JSONata functions](https://github.com/jsonata-js/jsonata/tree/master/docs) - -### Minimal Complete Example - -```json -{ - "Comment": "Order processing workflow", - "QueryLanguage": "JSONata", - "StartAt": "ValidateOrder", - "States": { - "ValidateOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:getItem", - "Arguments": { - "TableName": "OrdersTable", - "Key": { - "orderId": { - "S": "{% $states.input.orderId %}" - } - } - }, - "Assign": { - "orderId": "{% $states.input.orderId %}" - }, - "Output": "{% $states.result.Item %}", - "Next": "CheckStock" - }, - "CheckStock": { - "Type": "Choice", - "Choices": [ - { - "Condition": "{% $states.input.inStock = true %}", - "Next": "ProcessPayment" - } - ], - "Default": "OutOfStock" - }, - "ProcessPayment": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": { - "QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/PaymentQueue", - "MessageBody": "{% $string({'orderId': $orderId, 'amount': $states.input.total.N}) %}" - }, - "Output": { - "orderId": "{% $orderId %}", - "messageId": "{% $states.result.MessageId %}" - }, - "Retry": [ - { - "ErrorEquals": ["States.TaskFailed"], - "IntervalSeconds": 2, - "MaxAttempts": 3, - "BackoffRate": 2.0 - } - ], - "End": true - }, - "OutOfStock": { - "Type": "Fail", - "Error": "OutOfStockError", - "Cause": "Requested item is out of stock" - } - } -} -``` - ## Best Practices - Set `"QueryLanguage": "JSONata"` at the top level for new state machines unless JSONPath is mandatory @@ -224,6 +112,7 @@ Plus all [built-in JSONata functions](https://github.com/jsonata-js/jsonata/tree - Using `$` or `$$` at the top level of a JSONata expression — use `$states.input` instead. - Forgetting `{% %}` delimiters around JSONata expressions — the string will be treated as a literal. - Assigning variables in `Assign` and expecting them in `Output` of the same state — new values only take effect in the next state. +- Reference validation-and-testing.md and error-handling.md for detailed troubleshooting information. ## Resources diff --git a/aws-step-functions/steering/architecture-patterns.md b/aws-step-functions/steering/architecture-patterns.md index 74e35f8..266ef31 100644 --- a/aws-step-functions/steering/architecture-patterns.md +++ b/aws-step-functions/steering/architecture-patterns.md @@ -2,7 +2,7 @@ ## Polling Loop (Wait → Check → Choice) -Many AWS operations are asynchronous — you start them and then poll until they complete. The pattern is: initial wait → call describe/status API → check result → short wait → loop back. +Many AWS operations are asynchronous — you start them and then poll until they complete. The pattern is: Start Task → initial wait (estimate this based on the expected time it takes to complete the Task) → call describe/status API → check result → short wait → loop back. ```json "SubmitOrder": { @@ -49,7 +49,7 @@ Many AWS operations are asynchronous — you start them and then poll until they ``` Key elements: -- Initial longer wait gives the operation time to start. Shorter poll interval for subsequent checks. +- Initial longer wait gives the operation time to run. Shorter poll interval for subsequent checks. - Choice state routes to success, failure, or back to the wait loop. - Always add Retry on the status-check Task to handle transient API errors. - Consider adding `TimeoutSeconds` on the state machine or a counter variable to prevent infinite polling. diff --git a/aws-step-functions/steering/asl-state-types.md b/aws-step-functions/steering/asl-state-types.md index 00c1929..e263cd4 100644 --- a/aws-step-functions/steering/asl-state-types.md +++ b/aws-step-functions/steering/asl-state-types.md @@ -1,90 +1,33 @@ # ASL Structure and State Types (JSONata Mode) -## State Machine Top-Level Structure - -```json -{ - "Comment": "Description of the state machine", - "QueryLanguage": "JSONata", - "StartAt": "FirstStateName", - "TimeoutSeconds": 3600, - "Version": "1.0", - "States": { - "FirstStateName": { ... }, - "SecondStateName": { ... } - } -} -``` - -- `QueryLanguage`: Set to `"JSONata"` at top level. Defaults to `"JSONPath"` if omitted. -- `StartAt`: Must exactly match a state name (case-sensitive). -- `TimeoutSeconds`: Optional max execution time. Exceeding it throws `States.Timeout`. -- `States`: Required object containing all state definitions. -- State names must be unique and ≤ 80 Unicode characters. - -## Common Fields for All JSONata States - -| Field | Description | -|-------|-------------| -| `Type` | Required. One of: Task, Pass, Choice, Wait, Parallel, Map, Succeed, Fail | -| `Comment` | Optional human-readable description | -| `Next` | Name of next state (required for non-terminal states except Choice) | -| `End` | Set to `true` for terminal states | -| `Output` | Optional. Transform state output. Available in all types except Fail | -| `Assign` | Optional. Store workflow variables. Available in all types except Succeed and Fail | -| `QueryLanguage` | Optional per-state override | - -## Field Availability Matrix (JSONata) - -``` - Task Parallel Map Pass Wait Choice Succeed Fail -Type ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ -Comment ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ -Output ✓ ✓ ✓ ✓ ✓ ✓ ✓ -Assign ✓ ✓ ✓ ✓ ✓ ✓ -Next/End ✓ ✓ ✓ ✓ ✓ -Arguments ✓ ✓ -Retry/Catch ✓ ✓ ✓ -``` - +Quick reference for the eight state types in AWS Step Functions. Reference variables-and-data.md for details about the fields available inside each state. --- ## Pass State -Passes input to output, optionally transforming it. Useful for injecting data or reshaping payloads. - -```json -"InjectData": { - "Type": "Pass", - "Output": { - "greeting": "{% 'Hello, ' & $states.input.name %}", - "timestamp": "{% $now() %}" - }, - "Next": "NextState" -} -``` - -With variable assignment: +Passes input to output, optionally transforming it with JSONata. Useful for injecting or transforming data. Without `Output`, the Pass state copies input to output unchanged. ```json -"StoreDefaults": { +"SetupAndGreet": { "Type": "Pass", "Assign": { "retryCount": 0, "maxRetries": 3, "config": "{% $states.input.configuration %}" }, + "Output": { + "greeting": "{% 'Hello, ' & $states.input.name %}", + "timestamp": "{% $now() %}" + }, "Next": "ProcessItem" } ``` -Without `Output`, the Pass state copies input to output unchanged. - --- ## Task State -Executes work via AWS service integrations, activities, or HTTP APIs. +Executes work via AWS service integrations, activities, or HTTP APIs. Reference service-integrations.md for full details. ### Required Fields - `Resource`: ARN identifying the task to execute @@ -93,54 +36,25 @@ Executes work via AWS service integrations, activities, or HTTP APIs. - `Arguments`: Input to the task (replaces JSONPath `Parameters`) - `Output`: Transform the result - `Assign`: Store variables from input or result -- `TimeoutSeconds`: Max task duration (default 60, accepts JSONata expression) +- `TimeoutSeconds`: Max task duration (default 99999999, accepts JSONata expression) - `HeartbeatSeconds`: Heartbeat interval (must be < TimeoutSeconds) - `Retry`: Retry policy array - `Catch`: Error handler array - `Credentials`: Cross-account role assumption -### Lambda Invoke Example - -```json -"InvokeLambda": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:MyFunc:$LATEST", - "Payload": { - "orderId": "{% $states.input.orderId %}", - "customer": "{% $states.input.customer %}" - } - }, - "Assign": { - "processedResult": "{% $states.result.Payload %}" - }, - "Output": "{% $states.result.Payload %}", - "Next": "NextState" -} -``` - -### Dynamic Timeout - -```json -"LongRunningTask": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:SlowFunc:$LATEST", - "Payload": "{% $states.input %}" - }, - "TimeoutSeconds": "{% $states.input.timeoutValue %}", - "HeartbeatSeconds": "{% $states.input.heartbeatValue %}", - "Next": "Done" -} -``` - --- ## Choice State -Adds branching logic. Uses `Condition` field with JSONata boolean expressions (replaces JSONPath `Variable` + comparison operators). +Uses `Choices` and `Condition` fields with JSONata boolean expressions to implement branching logic. + +Key points: +- `Condition` must evaluate to a boolean. +- Each Choice Rule can have its own `Assign` and `Output`. +- If a rule matches, its `Assign`/`Output` are used (not the state-level ones). +- If no rule matches, the state-level `Assign` is evaluated and `Default` is followed. +- `Default` is optional but recommended — without it, `States.NoChoiceMatched` is thrown. +- Choice states cannot be terminal (no `End` field). ### Structure @@ -174,16 +88,6 @@ Adds branching logic. Uses `Condition` field with JSONata boolean expressions (r } ``` -Key points: -- `Condition` must evaluate to a boolean. -- Each Choice Rule can have its own `Assign` and `Output`. -- If a rule matches, its `Assign`/`Output` are used (not the state-level ones). -- If no rule matches, the state-level `Assign` is evaluated and `Default` is followed. -- `Default` is optional but recommended — without it, `States.NoChoiceMatched` is thrown. -- Choice states cannot be terminal (no `End` field). - -### Complex Conditions - JSONata supports rich boolean logic: ```json @@ -201,16 +105,6 @@ JSONata supports rich boolean logic: Delays execution for a specified duration or until a timestamp. -### Wait by Seconds - -```json -"WaitTenSeconds": { - "Type": "Wait", - "Seconds": 10, - "Next": "Continue" -} -``` - ### Wait with Dynamic Seconds ```json @@ -251,24 +145,12 @@ Terminates the state machine (or a Parallel branch / Map iteration) successfully } ``` -Without `Output`, passes input through as output. No `Next` field allowed. - --- ## Fail State Terminates the state machine with an error. -```json -"OrderFailed": { - "Type": "Fail", - "Error": "OrderValidationError", - "Cause": "The order could not be validated" -} -``` - -### Dynamic Error and Cause - ```json "DynamicFail": { "Type": "Fail", @@ -276,18 +158,7 @@ Terminates the state machine with an error. "Cause": "{% $states.input.errorMessage %}" } ``` - -Build rich, defensive error messages with fallbacks for missing fields: - -```json -"OrderProcessingFailed": { - "Type": "Fail", - "Error": "OrderProcessingError", - "Cause": "{% 'Failed to process order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown error') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details available') & '. Timestamp: ' & $now() %}" -} -``` - -No `Next`, `End`, `Output`, or `Assign` fields. Fail states are always terminal. +Reference error-handling.md for more information. --- @@ -439,9 +310,11 @@ Inside `ItemSelector`, you can access: | `ItemBatcher` | Batch items into sub-arrays | | `ResultWriter` | Write results to an external resource | -### ProcessorConfig +### Map ProcessorConfig -The `ItemProcessor` can include a `ProcessorConfig` to control execution mode: +The `ItemProcessor` can include a `ProcessorConfig` to control execution mode. +- `INLINE` (default) — iterations run within the parent execution. Use for most cases. +- `DISTRIBUTED` — iterations run as child executions. Use for large-scale processing (thousands+ items), items read from S3, or when you need per-iteration execution history. ```json "ItemProcessor": { @@ -453,10 +326,7 @@ The `ItemProcessor` can include a `ProcessorConfig` to control execution mode: } ``` -- `INLINE` (default) — iterations run within the parent execution. Use for most cases. -- `DISTRIBUTED` — iterations run as child executions. Use for large-scale processing (thousands+ items), items read from S3, or when you need per-iteration execution history. - -### Failure Tolerance +### Map Failure Tolerance ```json "ProcessWithTolerance": { diff --git a/aws-step-functions/steering/error-handling.md b/aws-step-functions/steering/error-handling.md index b27ad18..fc6c377 100644 --- a/aws-step-functions/steering/error-handling.md +++ b/aws-step-functions/steering/error-handling.md @@ -2,9 +2,7 @@ ## Overview -When a state encounters an error, Step Functions defaults to failing the entire execution. You can override this with `Retry` (retry the failed state) and `Catch` (transition to a fallback state). - -`Retry` and `Catch` are available on: Task, Parallel, and Map states. +When a state encounters an error, Step Functions defaults to failing the entire execution. You can override this with `Retry` (retry the failed state) and `Catch` (transition to a fallback state). `Retry` and `Catch` are available on: Task, Parallel, and Map states. ## Error Names @@ -17,12 +15,9 @@ Errors are identified by case-sensitive strings. Step Functions defines these bu | `States.HeartbeatTimeout` | Task missed heartbeat interval | | `States.TaskFailed` | Task failed during execution | | `States.Permissions` | Insufficient privileges | -| `States.ResultPathMatchFailure` | ResultPath cannot be applied (JSONPath only) | -| `States.ParameterPathFailure` | Parameter path resolution failed (JSONPath only) | | `States.QueryEvaluationError` | JSONata expression evaluation failed | | `States.BranchFailed` | A Parallel state branch failed | | `States.NoChoiceMatched` | No Choice rule matched and no Default | -| `States.IntrinsicFailure` | Intrinsic function failed (JSONPath only) | | `States.ExceedToleratedFailureThreshold` | Map state exceeded failure tolerance | | `States.ItemReaderFailed` | Map state ItemReader failed | | `States.ResultWriterFailed` | Map state ResultWriter failed | @@ -46,76 +41,12 @@ The `Retry` field is an array of Retrier objects. The interpreter scans retriers | `MaxDelaySeconds` | integer | — | Cap on retry interval | | `JitterStrategy` | string | — | Jitter strategy (e.g., `"FULL"`) | -### Basic Retry - -```json -"ProcessPayment": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Pay:$LATEST", - "Payload": "{% $states.input %}" - }, - "Retry": [ - { - "ErrorEquals": ["States.TaskFailed"], - "IntervalSeconds": 2, - "MaxAttempts": 3, - "BackoffRate": 2.0 - } - ], - "Next": "Confirm" -} -``` - -This retries after 2s, 4s, 8s (3 attempts with 2x backoff). - -### Retry with Max Delay and Jitter - -```json -"Retry": [ - { - "ErrorEquals": ["States.TaskFailed"], - "IntervalSeconds": 1, - "MaxAttempts": 5, - "BackoffRate": 2.0, - "MaxDelaySeconds": 30, - "JitterStrategy": "FULL" - } -] -``` - -### Multiple Retriers - -Retriers are evaluated in order. Each retrier tracks its own attempt count independently: - -```json -"Retry": [ - { - "ErrorEquals": ["ThrottlingException"], - "IntervalSeconds": 1, - "MaxAttempts": 5, - "BackoffRate": 2.0, - "JitterStrategy": "FULL" - }, - { - "ErrorEquals": ["States.Timeout"], - "MaxAttempts": 0 - }, - { - "ErrorEquals": ["States.ALL"], - "IntervalSeconds": 3, - "MaxAttempts": 2, - "BackoffRate": 1.5 - } -] -``` - Rules: - `States.ALL` must appear alone in its `ErrorEquals` array. - `States.ALL` must be in the last retrier. - `MaxAttempts: 0` means "never retry this error." - Retrier attempt counts reset when the interpreter transitions to another state. +- Retriers are evaluated in order. Each retrier tracks its own attempt count independently. --- @@ -132,118 +63,60 @@ The `Catch` field is an array of Catcher objects. After retries are exhausted (o | `Output` | any | Optional. Transform the error output | | `Assign` | object | Optional. Assign variables from error context | -### Basic Catch - -```json -"ProcessOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Process:$LATEST", - "Payload": "{% $states.input %}" - }, - "Catch": [ - { - "ErrorEquals": ["ValidationError"], - "Output": { - "error": "{% $states.errorOutput.Error %}", - "cause": "{% $states.errorOutput.Cause %}", - "originalInput": "{% $states.input %}" - }, - "Next": "HandleValidationError" - }, - { - "ErrorEquals": ["States.ALL"], - "Output": "{% $states.errorOutput %}", - "Next": "HandleGenericError" - } - ], - "Next": "Success" -} -``` - ### Error Output Structure -When a state fails and matches a Catcher, the Error Output is a JSON object with: +When a state fails and matches a Catcher, `$states.errorOutput` is a JSON object with: - `Error` (string) — the error name - `Cause` (string) — human-readable error description -```json -{ - "Error": "States.TaskFailed", - "Cause": "Lambda function returned an error" -} -``` - -### Catch with Variable Assignment - -```json -"Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "hasError": true, - "errorType": "{% $states.errorOutput.Error %}", - "errorMessage": "{% $states.errorOutput.Cause %}" - }, - "Output": "{% $merge([$states.input, {'error': $states.errorOutput}]) %}", - "Next": "ErrorHandler" - } -] -``` - In a Catch block, `Assign` and `Output` can reference: - `$states.input` — the original state input - `$states.errorOutput` — the error details - `$states.context` — execution context -If a Catcher matches, the state's top-level `Assign` is NOT evaluated — only the Catcher's `Assign` runs. +If a Catcher matches, the state's top-level `Assign` is NOT evaluated — only the Catcher's `Assign` runs. If no `Output` is provided in the Catcher, the state output is the raw Error Output object. -### Catch Without Output +When both Retry and Catch are present, retries are attempted first. Only if retries are exhausted does the Catch apply. -If no `Output` is provided in the Catcher, the state output is the raw Error Output object. +--- -### Building Rich Error Context for Fail States +## Handling States.QueryEvaluationError -A user-friendly pattern is to capture error details into a variable via Catch `Assign`, then reference that variable in a Fail state's `Cause` with defensive fallbacks: +JSONata expressions can fail at runtime. Common causes: -```json -"ChargePayment": { - "Type": "Task", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": { ... }, - "Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "error": "{% $states.errorOutput %}" - }, - "Next": "PaymentFailed" - } - ], - "Next": "ConfirmOrder" -}, -"PaymentFailed": { - "Type": "Fail", - "Error": "PaymentError", - "Cause": "{% 'Payment failed for order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details') & '. Timestamp: ' & $now() %}" -} -``` +1. Type error — `{% $x + $y %}` where `$x` or `$y` is not a number +2. Type incompatibility — `"TimeoutSeconds": "{% $name %}"` where `$name` is a string +3. Value out of range — negative number for `TimeoutSeconds` +4. Undefined result — `{% $data.nonExistentField %}` — JSON cannot represent undefined + +Prevent these errors with defensive expressions: use `$exists()` before accessing fields evaluated at runtime, `$type()` before arithmetic, and guard filtered results that may return a single object instead of an array. Always guard with `$exists()` — if a variable was never assigned (e.g., the Catch didn't fire for that path), referencing it directly throws `States.QueryEvaluationError`. Reference variables-and-data.md for defensive JSONata examples. -Always guard with `$exists()` — if the variable was never assigned (e.g., the Catch didn't fire for that path), referencing it directly throws `States.QueryEvaluationError`. +--- + +## Error Handling in Parallel States + +If any branch fails, the entire Parallel state fails. Use `States.BranchFailed` in Retry/Catch at the Parallel state level. --- -## Combined Retry and Catch +## Error Handling in Map States -When both are present, retries are attempted first. Only if retries are exhausted does the Catch apply: +Individual iteration failures can be tolerated with `ToleratedFailurePercentage` or `ToleratedFailureCount`. If the threshold is exceeded, the Map state throws `States.ExceedToleratedFailureThreshold`. + +--- + +## Examples + +### Retry and Catch with User-Friendly Error + +Retries transient errors with backoff, then catches all errors into a variable and transitions to a Fail state with a descriptive Cause. Guard variable references with `$exists()` in case the Catch path wasn't taken. ```json -"CallExternalAPI": { +"ChargePayment": { "Type": "Task", "Resource": "arn:aws:states:::lambda:invoke", "Arguments": { - "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:CallAPI:$LATEST", + "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", "Payload": "{% $states.input %}" }, "Retry": [ @@ -255,97 +128,30 @@ When both are present, retries are attempted first. Only if retries are exhauste "JitterStrategy": "FULL" }, { - "ErrorEquals": ["States.Timeout"], - "IntervalSeconds": 5, - "MaxAttempts": 2 + "ErrorEquals": ["States.QueryEvaluationError"], + "MaxAttempts": 0 } ], "Catch": [ - { - "ErrorEquals": ["ThrottlingException", "ServiceUnavailable"], - "Assign": { - "retryExhausted": true - }, - "Output": { - "error": "Service temporarily unavailable after retries", - "details": "{% $states.errorOutput %}" - }, - "Next": "NotifyAndRetryLater" - }, { "ErrorEquals": ["States.ALL"], - "Output": { - "error": "{% $states.errorOutput %}", - "input": "{% $states.input %}" + "Assign": { + "error": "{% $states.errorOutput %}" }, - "Next": "FatalErrorHandler" + "Next": "PaymentFailed" } ], "Output": "{% $states.result.Payload %}", - "Next": "ProcessResponse" -} -``` - ---- - -## Handling States.QueryEvaluationError - -JSONata expressions can fail at runtime. Common causes: - -1. **Type error**: `{% $x + $y %}` where `$x` or `$y` is not a number -2. **Type incompatibility**: `"TimeoutSeconds": "{% $name %}"` where `$name` is a string -3. **Value out of range**: Negative number for `TimeoutSeconds` -4. **Undefined result**: `{% $data.nonExistentField %}` — JSON cannot represent undefined - -All of these throw `States.QueryEvaluationError`. Handle it like any other error: - -```json -"Retry": [ - { - "ErrorEquals": ["States.QueryEvaluationError"], - "MaxAttempts": 0 - } -], -"Catch": [ - { - "ErrorEquals": ["States.QueryEvaluationError"], - "Output": { - "error": "Data transformation failed", - "details": "{% $states.errorOutput %}" - }, - "Next": "HandleDataError" - } -] -``` - -### Preventing QueryEvaluationError - -Use defensive JSONata expressions: - -```json -"Output": { - "name": "{% $exists($states.input.name) ? $states.input.name : 'Unknown' %}", - "total": "{% $type($states.input.amount) = 'number' ? $states.input.amount : 0 %}" -} -``` - -Watch out for single-value vs array results from filters. JSONata returns a single object (not a 1-element array) when a filter matches exactly one item, and undefined when nothing matches. Both cases will throw `States.QueryEvaluationError` if you pass the result to array-expecting functions like `$count`, `$map`, or a Map state `Items` field. - -Guard filtered results before using them: - -```json -"Assign": { - "pendingOrders": "{% ($filtered := $states.input.orders[status = 'pending']; $type($filtered) = 'array' ? $filtered : $exists($filtered) ? [$filtered] : []) %}" + "Next": "ConfirmOrder" +}, +"PaymentFailed": { + "Type": "Fail", + "Error": "PaymentError", + "Cause": "{% 'Payment failed for order ' & ($exists($orderId) ? $orderId : 'unknown') & ': ' & ($exists($error.Error) ? $error.Error : 'Unknown') & ' - ' & ($exists($error.Cause) ? $error.Cause : 'No details') & '. Timestamp: ' & $now() %}" } ``` -This ensures `$pendingOrders` is always an array regardless of how many items matched. - ---- - -## Error Handling in Parallel States - -If any branch fails, the entire Parallel state fails. Catch the error at the Parallel state level: +### Parallel State Error Handling ```json "ParallelWork": { @@ -360,6 +166,9 @@ If any branch fails, the entire Parallel state fails. Catch the error at the Par "Catch": [ { "ErrorEquals": ["States.ALL"], + "Assign": { + "parallelError": "{% $states.errorOutput %}" + }, "Output": { "error": "{% $states.errorOutput %}", "failedAt": "parallel execution" @@ -371,11 +180,7 @@ If any branch fails, the entire Parallel state fails. Catch the error at the Par } ``` ---- - -## Error Handling in Map States - -Individual iteration failures can be tolerated: +### Map State Error Handling ```json "ProcessAll": { @@ -386,6 +191,9 @@ Individual iteration failures can be tolerated: "Catch": [ { "ErrorEquals": ["States.ExceedToleratedFailureThreshold"], + "Assign": { + "batchError": "{% $states.errorOutput %}" + }, "Output": { "error": "Too many items failed", "details": "{% $states.errorOutput %}" @@ -400,46 +208,3 @@ Individual iteration failures can be tolerated: "Next": "Done" } ``` - ---- - -## Common Error Handling Patterns - -### Circuit Breaker with Variables - -```json -"CheckRetryCount": { - "Type": "Choice", - "Choices": [ - { - "Condition": "{% $retryCount >= $maxRetries %}", - "Next": "MaxRetriesExceeded" - } - ], - "Default": "AttemptOperation" -}, -"AttemptOperation": { - "Type": "Task", - "Resource": "...", - "Assign": { - "retryCount": "{% $retryCount + 1 %}" - }, - "Catch": [ - { - "ErrorEquals": ["States.ALL"], - "Assign": { - "retryCount": "{% $retryCount + 1 %}", - "lastError": "{% $states.errorOutput %}" - }, - "Next": "WaitBeforeRetry" - } - ], - "Next": "Success" -}, -"WaitBeforeRetry": { - "Type": "Wait", - "Seconds": "{% $power(2, $retryCount) %}", - "Next": "CheckRetryCount" -} -``` - diff --git a/aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md b/aws-step-functions/steering/migrating-from-jsonpath-to-jsonata.md similarity index 61% rename from aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md rename to aws-step-functions/steering/migrating-from-jsonpath-to-jsonata.md index 413ebac..9550cde 100644 --- a/aws-step-functions/steering/converting-from-jsonpath-to-jsonata.md +++ b/aws-step-functions/steering/migrating-from-jsonpath-to-jsonata.md @@ -1,62 +1,19 @@ -# Converting from JSONPath to JSONata +# Migrating from JSONPath to JSONata -Systematic conversion guide for migrating existing JSONPath state machines to JSONata. Covers field mapping, state-type patterns, intrinsic function replacements, and common pitfalls. +Complete conversion guide for migrating existing JSONPath state machines to JSONata. Covers fields, states, intrinsic functions, common pitfalls, and the end-to-end conversion workflow. -## Migration Strategy +## JSONPath → JSONata Quick Reference -Convert incrementally by setting `QueryLanguage` per-state. JSONPath states and JSONata states can coexist: - -```json -{ - "StartAt": "LegacyState", - "States": { - "LegacyState": { - "Type": "Task", - "Resource": "arn:aws:states:::lambda:invoke", - "Parameters": { "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Legacy:$LATEST", "Payload.$": "$" }, - "ResultPath": "$.legacyResult", - "Next": "MigratedState" - }, - "MigratedState": { - "Type": "Task", - "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": { "FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:Modern:$LATEST", "Payload": "{% $states.input %}" }, - "Output": "{% $states.result.Payload %}", - "End": true - } - } -} -``` - -When all states are converted, promote `QueryLanguage` to the top level and remove per-state declarations. - ---- - -## Field Mapping Reference - -### I/O Fields: Five Become Two - -| JSONPath Field | JSONata Equivalent | +| JSONPath | JSONata | |---|---| -| `InputPath` | Not needed — use `$states.input.path` directly in `Arguments` | +| `InputPath` | Not needed — use `$states.input` directly in `Arguments` | | `Parameters` | `Arguments` | | `ResultSelector` | `Output` (reference `$states.result`) | -| `ResultPath` | `Output` with `$merge`, or `Assign` (preferred) | +| `ResultPath` | `Assign` (preferred) or `Output` | | `OutputPath` | `Output` (return only what you need) | - -### Path Fields Eliminated - -| JSONPath | JSONata | -|---|---| | `TimeoutSecondsPath` | `TimeoutSeconds` with `{% %}` | | `HeartbeatSecondsPath` | `HeartbeatSeconds` with `{% %}` | | `ItemsPath` | `Items` with `{% %}` | - -### Syntax Changes - -| JSONPath | JSONata | -|---|---| | `"key.$": "$.field"` | `"key": "{% $states.input.field %}"` | | `$` or `$.field` (state input) | `$states.input` or `$states.input.field` | | `$$` (context object) | `$states.context` | @@ -65,6 +22,7 @@ When all states are converted, promote `QueryLanguage` to the top level and remo | `$$.Map.Item.Value` | `$states.context.Map.Item.Value` | | `$variable` (workflow var) | `$variable` (unchanged) | + --- ## Converting Each State Type @@ -102,35 +60,25 @@ When all states are converted, promote `QueryLanguage` to the top level and remo } ``` -Steps: (1) Fold `InputPath` path into `$states.input` references. (2) `Parameters` → `Arguments`, remove `.$` suffixes, wrap in `{% %}`. (3) Collapse `ResultSelector` + `ResultPath` + `OutputPath` into `Output`. - -### ResultPath Patterns - -**Merging result into input** (`ResultPath: "$.field"`): -```json -// Preferred: use Assign to store, pass input through -"Assign": { "priceResult": "{% $states.result.Payload %}" }, -"Output": "{% $states.input %}" - -// Alternative: explicit merge -"Output": "{% $merge([$states.input, {'priceResult': $states.result.Payload}]) %}" -``` +### Pass State -**Discarding result** (`ResultPath: null`): +**Before (JSONPath):** ```json -"Output": "{% $states.input %}" +"InjectDefaults": { + "Type": "Pass", + "Result": { "region": "us-east-1" }, + "ResultPath": "$.config", + "Next": "Go" +} ``` -### Pass State - -**Before:** `Result` + `ResultPath` → **After:** `Output` (or just `Assign` if downstream uses variables) - +**After (JSONata):** ```json -// JSONPath -"InjectDefaults": { "Type": "Pass", "Result": { "region": "us-east-1" }, "ResultPath": "$.config", "Next": "Go" } - -// JSONata — use Assign when possible -"InjectDefaults": { "Type": "Pass", "Assign": { "region": "us-east-1" }, "Next": "Go" } +"InjectDefaults": { + "Type": "Pass", + "Assign": { "region": "us-east-1" }, + "Next": "Go" +} ``` ### Choice State @@ -174,12 +122,13 @@ JSONPath uses `Variable` + typed operators. JSONata uses a single `Condition` ex ### Wait State -`SecondsPath` → `Seconds` with `{% %}`. `TimestampPath` → `Timestamp` with `{% %}`. - +**Before (JSONPath):** ```json -// JSONPath { "Type": "Wait", "TimestampPath": "$.deliveryDate", "Next": "Check" } -// JSONata +``` + +**After (JSONata):** +```json { "Type": "Wait", "Timestamp": "{% $states.input.deliveryDate %}", "Next": "Check" } ``` @@ -191,10 +140,10 @@ JSONPath uses `Variable` + typed operators. JSONata uses a single `Condition` ex | `Parameters` (with `$$.Map.*`) | `ItemSelector` (with `$states.context.Map.*`) | | `Iterator` | `ItemProcessor` (add `ProcessorConfig`) | | `ResultSelector` inside iterator | `Output` inside processor states | -| `ResultPath` on Map | `Assign` or `$merge` in `Output` | +| `ResultPath` on Map | `Assign` | +**After (JSONata):** ```json -// JSONata Map "ProcessItems": { "Type": "Map", "Items": "{% $states.input.orderData.items %}", @@ -230,6 +179,7 @@ JSONPath uses `Variable` + typed operators. JSONata uses a single `Condition` ex | `States.Format('Order {}', $.id)` | `'Order ' & $states.input.id` | | `States.StringToJson($.str)` | `$parse($states.input.str)` | | `States.JsonToString($.obj)` | `$string($states.input.obj)` | +| `States.StringSplit($.str, ',')` | `$split($states.input.str, ',')` | | `States.Array($.a, $.b)` | `[$states.input.a, $states.input.b]` | | `States.ArrayPartition($.arr, 2)` | `$partition($states.input.arr, 2)` | | `States.ArrayContains($.arr, $.v)` | `$states.input.v in $states.input.arr` | @@ -251,18 +201,17 @@ JSONPath uses `Variable` + typed operators. JSONata uses a single `Condition` ex JSONPath Catch uses `ResultPath`. JSONata Catch uses `Assign` and `Output` with `$states.errorOutput`. +**Before (JSONPath):** ```json -// JSONPath "Catch": [{ "ErrorEquals": ["States.ALL"], "ResultPath": "$.error", "Next": "HandleError" }] +``` -// JSONata — preferred: store in variable -"Catch": [{ "ErrorEquals": ["States.ALL"], "Assign": { "errorInfo": "{% $states.errorOutput %}" }, "Next": "HandleError" }] - -// JSONata — if downstream expects merged object +**After (JSONata):** +```json "Catch": [{ "ErrorEquals": ["States.ALL"], "Assign": { "errorInfo": "{% $states.errorOutput %}" }, - "Output": "{% $merge([$states.input, {'error': $states.errorOutput}]) %}", + "Output": "{% $states.input %}", "Next": "HandleError" }] ``` @@ -271,49 +220,32 @@ Retry syntax is identical between JSONPath and JSONata — no conversion needed. --- -## Context Object Reference Mapping - -| JSONPath (`$$`) | JSONata (`$states.context`) | -|---|---| -| `$$.Execution.Id` | `$states.context.Execution.Id` | -| `$$.Execution.Input` | `$states.context.Execution.Input` | -| `$$.Execution.Name` | `$states.context.Execution.Name` | -| `$$.Execution.StartTime` | `$states.context.Execution.StartTime` | -| `$$.State.Name` | `$states.context.State.Name` | -| `$$.State.EnteredTime` | `$states.context.State.EnteredTime` | -| `$$.StateMachine.Id` | `$states.context.StateMachine.Id` | -| `$$.Task.Token` | `$states.context.Task.Token` | -| `$$.Map.Item.Value` | `$states.context.Map.Item.Value` | -| `$$.Map.Item.Index` | `$states.context.Map.Item.Index` | - ---- - -## Common Conversion Pitfalls +## Conversion Pitfalls and How to Avoid Them -### 1. Mixing JSONPath and JSONata fields in the same state +### 1. Do not mix JSONPath and JSONata fields in the same state Invalid combinations: `Arguments` + `InputPath`, `Output` + `ResultSelector`, `Condition` + `Variable`. Remove all JSONPath fields from converted states. -### 2. Forgetting to remove `.$` suffixes +### 2. You must remove `.$` suffixes ```json ❌ "orderId.$": "{% $states.input.orderId %}" ✓ "orderId": "{% $states.input.orderId %}" ``` -### 3. Using `$` or `$$` instead of `$states` +### 3. Use `$states` instead of `$` or `$$`. ```json ❌ "{% $.orderId %}" ❌ "{% $$.Task.Token %}" ✓ "{% $states.input.orderId %}" ✓ "{% $states.context.Task.Token %}" ``` Note: `$` is valid inside nested filter expressions (e.g., `$states.input.items[$.price > 10]`). -### 4. Double quotes inside JSONata expressions +### 4. Do not use double quotes inside JSONata expressions ```json ❌ "{% $states.input.status = "active" %}" ✓ "{% $states.input.status = 'active' %}" ``` -### 5. Expecting Assign values in Output of the same state -`Assign` and `Output` evaluate in parallel — new variable values are not available in `Output`: +### 5. Do not attempt to access the output of `Assign` or `Output` in the same state where they are assigned. +`Assign` and `Output` evaluate in parallel — new variable values are not available until the next state. ```json ❌ "Assign": { "total": "{% $states.result.Payload.total %}" }, "Output": { "total": "{% $total %}" } @@ -321,14 +253,14 @@ Note: `$` is valid inside nested filter expressions (e.g., `$states.input.items[ "Output": { "total": "{% $states.result.Payload.total %}" } ``` -### 6. Undefined field access +### 6. Use defensive coding to prevent undefined errors in JSONata JSONPath silently returns null. JSONata throws `States.QueryEvaluationError`: ```json ❌ "{% $states.input.customer.middleName %}" ✓ "{% $exists($states.input.customer.middleName) ? $states.input.customer.middleName : '' %}" ``` -### 7. Single-item filter results +### 7. Use defensive coding to prevent invalid filter results JSONata returns a single object (not a 1-element array) when exactly one item matches a filter, and undefined when nothing matches. Both break Map state `Items` and functions like `$count`: ```json ❌ "Items": "{% $states.input.orders[status = 'pending'] %}" @@ -344,21 +276,26 @@ JSONata returns a single object (not a 1-element array) when exactly one item ma --- -## Conversion Checklist - -1. Add `"QueryLanguage": "JSONata"` (per-state or top-level) -2. Remove all five JSONPath I/O fields (`InputPath`, `Parameters`, `ResultSelector`, `ResultPath`, `OutputPath`) -3. `Parameters` → `Arguments` (remove `.$`, wrap in `{% %}`, `$` → `$states.input`) -4. Collapse `ResultSelector` + `ResultPath` + `OutputPath` into single `Output` -5. `ResultPath: null` → `Output: "{% $states.input %}"` -6. `ResultPath: "$.field"` → `Assign` (preferred) or `Output` with `$merge` -7. `*Path` fields → base field + `{% %}` expression -8. `$$` → `$states.context` -9. `States.*` intrinsic functions → JSONata equivalents (see table above) -10. Choice `Variable` + operators → `Condition` expression -11. `Iterator` → `ItemProcessor` with `ProcessorConfig` -12. Catch `ResultPath` → Catch `Assign`/`Output` with `$states.errorOutput` -13. Pass `Result` → `Output` -14. Refactor `ResultPath` merge chains to use `Assign` variables -15. Test each state individually via Workflow Studio Test State -16. Promote `QueryLanguage` to top level when all states are converted \ No newline at end of file +## Conversion Workflow + +For each state being converted, apply these steps in order: + +1. Add `"QueryLanguage": "JSONata"` to the state +2. `Parameters` → `Arguments`: remove `.$` suffixes from all keys, wrap values in `{% %}`, replace `$` with `$states.input` and `$$` with `$states.context` +3. Convert `ResultPath` based on its value: + - Absent or `"$"` → no action needed (default behavior is replaced by `Output`) + - `null` → add `"Output": "{% $states.input %}"` + - `"$.field"` → add `"Assign": { "field": "{% $states.result %}" }` and `"Output": "{% $states.input %}"` +4. `ResultSelector` → fold selection logic into `Output` (reference `$states.result`) +5. `OutputPath` → fold into `Output` (return only what you need) +6. Reminder: If the state has `ResultSelector` + `ResultPath` + `OutputPath`, collapse all three into a single `Output` field +7. Remove all five JSONPath I/O fields: `InputPath`, `Parameters`, `ResultSelector`, `ResultPath`, `OutputPath` +8. Convert `*Path` fields to base field + `{% %}` expression (`TimeoutSecondsPath` → `TimeoutSeconds`, `HeartbeatSecondsPath` → `HeartbeatSeconds`, `ItemsPath` → `Items`) +9. Replace `States.*` intrinsic functions with JSONata equivalents (see Converting Intrinsic Functions table) +10. Choice states: replace `Variable` + comparison operators with a single `Condition` expression +11. Map states: `Iterator` → `ItemProcessor` with `ProcessorConfig`, `ItemsPath` → `Items`, `Parameters` with `$$.Map.*` → `ItemSelector` with `$states.context.Map.*` +12. Catch blocks: replace `ResultPath` with `Assign` + `Output` using `$states.errorOutput` +13. Pass states: replace `Result` with `Output` or `Assign` +14. Where multiple consecutive states used `ResultPath` to thread data through the payload, refactor to use `Assign` variables instead — downstream states reference `$variableName` directly +15. Validate the converted state using the TestState API +16. Repeat for all states, then promote `"QueryLanguage": "JSONata"` to the top level and remove per-state declarations \ No newline at end of file diff --git a/aws-step-functions/steering/service-integrations.md b/aws-step-functions/steering/service-integrations.md index 490c104..3b7b4a8 100644 --- a/aws-step-functions/steering/service-integrations.md +++ b/aws-step-functions/steering/service-integrations.md @@ -4,31 +4,28 @@ Step Functions can integrate with AWS services in three patterns: -1. **Optimized integrations** — Purpose-built, recommended where available (e.g., Lambda, DynamoDB, SNS, SQS, ECS, Glue, SageMaker, etc.) +1. **Optimized integrations** — Purpose-built, recommended where available 2. **AWS SDK integrations** — Call any AWS SDK API action directly 3. **HTTP Task** — Call HTTPS APIs (e.g., Stripe, Salesforce) -### Resource ARN Patterns +## Integration Patterns -``` -# Optimized integration -"Resource": "arn:aws:states:::servicename:apiAction" - -# Optimized integration (synchronous — wait for completion) -"Resource": "arn:aws:states:::servicename:apiAction.sync" - -# Optimized integration (wait for callback token) -"Resource": "arn:aws:states:::servicename:apiAction.waitForTaskToken" - -# AWS SDK integration -"Resource": "arn:aws:states:::aws-sdk:serviceName:apiAction" -``` +| Pattern | Resource ARN | Behavior | When to Use | +|---------|-------------|----------|-------------| +| Optimized | `arn:aws:states:::servicename:apiAction` | Call API and continue immediately | Fire-and-forget operations (start a process, send a message) | +| Optimized (sync) | `arn:aws:states:::servicename:apiAction.sync` | Wait for the job to complete | When you need the result before continuing (run ECS task, execute child workflow, run Glue job) | +| Optimized (callback) | `arn:aws:states:::servicename:apiAction.waitForTaskToken` | Pause until a task token is returned | Human approval, external system processing, long-running async operations | +| AWS SDK | `arn:aws:states:::aws-sdk:serviceName:apiAction` | Call any AWS SDK API action directly | When no optimized integration exists for the service | +| HTTP Task | `arn:aws:states:::http:invoke` | Call an HTTPS API endpoint | External APIs (e.g., Stripe, Salesforce) | --- -## Lambda Function +## Examples -### Optimized Integration (Recommended) +### Lambda Function + +#### Optimized Integration (Recommended) +Always review the AWS Documentation to check availability and proper usage of an optimized integration before using it: https://docs.aws.amazon.com/step-functions/latest/dg/integrate-optimized.html ```json "InvokeFunction": { @@ -50,7 +47,7 @@ Always include a version qualifier (`:$LATEST`, `:1`, or an alias like `:prod`) The result is wrapped in a `Payload` field, so use `$states.result.Payload` to access the Lambda return value. -### SDK Integration +#### SDK Integration ```json "InvokeViaSDK": { @@ -66,9 +63,9 @@ The result is wrapped in a `Payload` field, so use `$states.result.Payload` to a --- -## DynamoDB +### DynamoDB -### GetItem +#### GetItem ```json "GetUser": { @@ -90,121 +87,11 @@ The result is wrapped in a `Payload` field, so use `$states.result.Payload` to a } ``` -### PutItem - -```json -"SaveOrder": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:putItem", - "Arguments": { - "TableName": "OrdersTable", - "Item": { - "orderId": { - "S": "{% $orderId %}" - }, - "status": { - "S": "processing" - }, - "total": { - "N": "{% $string($states.input.total) %}" - }, - "createdAt": { - "S": "{% $now() %}" - } - } - }, - "Next": "ProcessOrder" -} -``` - -### UpdateItem - -```json -"UpdateStatus": { - "Type": "Task", - "Resource": "arn:aws:states:::dynamodb:updateItem", - "Arguments": { - "TableName": "OrdersTable", - "Key": { - "orderId": { - "S": "{% $orderId %}" - } - }, - "UpdateExpression": "SET #s = :status, updatedAt = :time", - "ExpressionAttributeNames": { - "#s": "status" - }, - "ExpressionAttributeValues": { - ":status": { - "S": "{% $states.input.newStatus %}" - }, - ":time": { - "S": "{% $now() %}" - } - } - }, - "Next": "Done" -} -``` - -### Query - -```json -"QueryOrders": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:dynamodb:query", - "Arguments": { - "TableName": "OrdersTable", - "KeyConditionExpression": "customerId = :cid", - "ExpressionAttributeValues": { - ":cid": { - "S": "{% $states.input.customerId %}" - } - } - }, - "Output": "{% $states.result.Items %}", - "Next": "ProcessOrders" -} -``` - --- -## SNS (Simple Notification Service) - -### Publish Message - -```json -"SendNotification": { - "Type": "Task", - "Resource": "arn:aws:states:::sns:publish", - "Arguments": { - "TopicArn": "arn:aws:sns:us-east-1:123456789012:OrderNotifications", - "Message": "{% 'Order ' & $orderId & ' has been processed successfully.' %}", - "Subject": "Order Confirmation" - }, - "Next": "Done" -} -``` +### SQS (Simple Queue Service) -### Publish with JSON Message - -```json -"SendStructuredNotification": { - "Type": "Task", - "Resource": "arn:aws:states:::sns:publish", - "Arguments": { - "TopicArn": "arn:aws:sns:us-east-1:123456789012:Alerts", - "Message": "{% $string({'orderId': $orderId, 'status': $states.input.status, 'timestamp': $now()}) %}" - }, - "Next": "Done" -} -``` - ---- - -## SQS (Simple Queue Service) - -### Send Message +#### Send Message ```json "QueueMessage": { @@ -218,7 +105,7 @@ The result is wrapped in a `Payload` field, so use `$states.result.Payload` to a } ``` -### Send Message with Wait for Task Token +#### Send Message with Wait for Task Token ```json "WaitForApproval": { @@ -232,14 +119,13 @@ The result is wrapped in a `Payload` field, so use `$states.result.Payload` to a "Next": "ProcessApproval" } ``` - The execution pauses until an external system calls `SendTaskSuccess` or `SendTaskFailure` with the task token. --- -## Step Functions (Nested Execution) +### Step Functions (Nested Execution) -### Start Execution (Synchronous) +#### Start Execution (Synchronous) ```json "RunSubWorkflow": { @@ -256,7 +142,7 @@ The execution pauses until an external system calls `SendTaskSuccess` or `SendTa Note: The `.sync:2` suffix waits for completion. The child output is a JSON string in `$states.result.Output`, so use `$parse()` to deserialize it. -### Start Execution (Async — Fire and Forget) +#### Start Execution (Async — Fire and Forget) ```json "StartAsync": { @@ -272,157 +158,7 @@ Note: The `.sync:2` suffix waits for completion. The child output is a JSON stri --- -## EventBridge - -### Put Events - -```json -"EmitEvent": { - "Type": "Task", - "Resource": "arn:aws:states:::events:putEvents", - "Arguments": { - "Entries": [ - { - "Source": "my.application", - "DetailType": "OrderProcessed", - "Detail": "{% $string({'orderId': $orderId, 'status': 'completed'}) %}", - "EventBusName": "default" - } - ] - }, - "Next": "Done" -} -``` - ---- - -## ECS / Fargate - -### Run Task (Synchronous) - -```json -"RunContainer": { - "Type": "Task", - "Resource": "arn:aws:states:::ecs:runTask.sync", - "Arguments": { - "LaunchType": "FARGATE", - "Cluster": "arn:aws:ecs:us-east-1:123456789012:cluster/MyCluster", - "TaskDefinition": "arn:aws:ecs:us-east-1:123456789012:task-definition/MyTask:1", - "NetworkConfiguration": { - "AwsvpcConfiguration": { - "Subnets": ["subnet-abc123"], - "SecurityGroups": ["sg-abc123"], - "AssignPublicIp": "ENABLED" - } - }, - "Overrides": { - "ContainerOverrides": [ - { - "Name": "my-container", - "Environment": [ - { - "Name": "ORDER_ID", - "Value": "{% $orderId %}" - } - ] - } - ] - } - }, - "TimeoutSeconds": 600, - "Next": "Done" -} -``` - ---- - -## AWS Glue - -### Start Job Run (Synchronous) - -```json -"RunGlueJob": { - "Type": "Task", - "Resource": "arn:aws:states:::glue:startJobRun.sync", - "Arguments": { - "JobName": "my-etl-job", - "Arguments": { - "--input_path": "{% $states.input.inputPath %}", - "--output_path": "{% $states.input.outputPath %}" - } - }, - "TimeoutSeconds": 3600, - "Next": "Done" -} -``` - ---- - -## Amazon Bedrock - -### Invoke Model - -```json -"InvokeModel": { - "Type": "Task", - "Resource": "arn:aws:states:::bedrock:invokeModel", - "Arguments": { - "ModelId": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0", - "ContentType": "application/json", - "Accept": "application/json", - "Body": { - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": 1024, - "messages": [ - { - "role": "user", - "content": "{% $states.input.prompt %}" - } - ] - } - }, - "Output": "{% $states.result.Body %}", - "Next": "ProcessResponse" -} -``` - ---- - -## S3 - -### GetObject - -```json -"ReadFile": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:s3:getObject", - "Arguments": { - "Bucket": "my-bucket", - "Key": "{% $states.input.filePath %}" - }, - "Output": "{% $states.result.Body %}", - "Next": "ProcessFile" -} -``` - -### PutObject - -```json -"WriteFile": { - "Type": "Task", - "Resource": "arn:aws:states:::aws-sdk:s3:putObject", - "Arguments": { - "Bucket": "my-bucket", - "Key": "{% 'results/' & $orderId & '.json' %}", - "Body": "{% $string($states.input.results) %}" - }, - "Next": "Done" -} -``` - ---- - -## Cross-Account Access +### Cross-Account Access Use the `Credentials` field to assume a role in another account: @@ -444,21 +180,7 @@ Use the `Credentials` field to assume a role in another account: --- -## Synchronous vs Asynchronous Patterns - -| Pattern | Resource Suffix | Behavior | -|---------|----------------|----------| -| Request-Response | (none) | Call API and continue immediately | -| Synchronous | `.sync` | Wait for task to complete | -| Wait for Callback | `.waitForTaskToken` | Pause until external callback | - -### When to Use Each - -- **Request-Response**: Fire-and-forget operations (start a process, send a message) -- **Synchronous (`.sync`)**: When you need the result before continuing (run ECS task, execute child workflow, run Glue job) -- **Wait for Callback (`.waitForTaskToken`)**: Human approval, external system processing, long-running async operations - -### Callback Pattern Example +### Callback Pattern ```json "WaitForHumanApproval": { diff --git a/aws-step-functions/steering/validation-and-testing.md b/aws-step-functions/steering/validation-and-testing.md index 8118fa7..be4a054 100644 --- a/aws-step-functions/steering/validation-and-testing.md +++ b/aws-step-functions/steering/validation-and-testing.md @@ -2,72 +2,25 @@ ## Local ASL Validation -Before calling any AWS API, validate state machine definitions locally. Files saved with the `.asl.json` extension get automatic validation from the AWS Toolkit Extension. If the extension is not installed, suggest the user install it (https://open-vsx.org/extension/amazonwebservices/aws-toolkit-vscode). Use the diagnostics tool on any `.asl.json` file to catch structural errors instantly: - -- Missing properties -- `Next` pointing to a state that doesn't exist -- Non terminal state -- Unreachable states -- Incorrect values - -Always save state machine definitions as `.asl.json` and run diagnostics before proceeding to TestState API calls. This catches structural problems without AWS credentials or API calls. +Files saved with the `.asl.json` extension get automatic validation from the AWS Toolkit Extension. If the extension is not installed, suggest the user install it (https://open-vsx.org/extension/amazonwebservices/aws-toolkit-vscode). Use your diagnostics tool on any `.asl.json` file to catch structural errors instantly. The State Machine definition must be saved as `.asl.json` to work with local validation. --- ## Testing with TestState API -The TestState API is the preferred and most reliable way to test Step Functions states. Always recommend TestState over deploying and running a full state machine execution for validation. It lets you test individual states in isolation — validating data transformations, error handling, Choice routing, and service integration behavior without deploying or executing a full state machine. With mocking support, tests are fast, repeatable, and require no AWS credentials or IAM roles. - -Examples use AWS CLI for brevity. The `TestState` API parameters (`definition`, `input`, `roleArn`, `inspectionLevel`, `mock`, `context`, `stateConfiguration`) map directly to any AWS SDK — Python (`sfn.test_state()`), JavaScript (`TestStateCommand`), Java (`TestStateRequest`), etc. - -## Best Practices - -### Always use TestState as the first testing approach -Before deploying a state machine or running a full execution, test each state with the TestState API. It catches data transformation errors, incorrect Choice routing, and misconfigured error handling faster and cheaper than a full execution. Full executions should only be used for end-to-end integration validation after individual states have been verified with TestState. - -### Test incrementally -Test each state individually before assembling the full state machine. Use `--state-name` to test states in context of the full definition once individual states pass. - -### Use mocks for unit testing -Mocks let you test state logic without AWS credentials, IAM roles, or real service calls. This enables fast, repeatable, CI-friendly tests. - -### Test error paths, not just happy paths -For every Task state with Retry/Catch, test: -- A successful mock result -- An error that matches a Retry (verify `status: "RETRIABLE"` and `retryBackoffIntervalSeconds`) -- An error that exhausts retries and falls through to Catch (verify `status: "CAUGHT_ERROR"` and `nextState`) -- An error that matches no handler (verify `status: "FAILED"`) - -### Test Choice state routing exhaustively -Test each Choice branch and the Default path. Verify `nextState` matches expectations for each input variant. - -### Use DEBUG for data transformation validation -When building complex `Arguments` or `Output` expressions, use `--inspection-level DEBUG` to see intermediate values. This catches JSONata expression errors before deployment. - -### Keep test inputs minimal -Provide only the fields the state actually references. This makes tests readable and makes it obvious which fields drive behavior. - -### Test variable assignment -When a state uses `Assign`, verify the output reflects the expected downstream behavior. Remember: `Assign` values are not visible in `Output` of the same state — they take effect in the next state. - -### Validate filter results for Map states -Use DEBUG inspection to check `afterItemSelector` and `afterItemBatcher`. Verify `toleratedFailureCount` and `toleratedFailurePercentage` match your expectations. +The TestState API enables unit and integration testing of Step Functions without deployment. Key capabilities: -### Use `jq` for readable CLI output -Pipe CLI output through `jq` to parse escaped JSON strings: -``` -aws stepfunctions test-state ... | jq '.output | fromjson' -aws stepfunctions test-state ... | jq '.inspectionData' -``` +- **Mock service integrations** — Test without invoking real services +- **Advanced states** — Map, Parallel, Activity, `.sync`, `.waitForTaskToken` (require mocks) +- **Control execution** — Simulate retries, Map iterations, error scenarios +- **Chain tests** — Use output→input to test execution paths +- **Optional IAM** — When mocking, `roleArn` optional -### Automate with scripts -Chain TestState calls in a shell script or test framework. Use `--state-name` with a full definition, feed each state's `output` as the next state's `--input`, and assert on `status` and `nextState` at each step. - -## Before Accessing AWS +### Before Accessing AWS Before calling the TestState API, follow this sequence: -1. Confirm the user wants to call the TestState API against their AWS account. +1. Ask the user to grant you permission to use the TestState API in their AWS account. 2. Check for AWS credentials: run `aws sts get-caller-identity` and verify the response. 3. If credentials are available, confirm the IAM role ARN to use for execution (or omit if using mocks). 4. If credentials are unavailable, help the user construct the CLI/SDK call to run manually. @@ -77,368 +30,257 @@ Before calling the TestState API, follow this sequence: The calling identity needs `states:TestState`. If not using mocks, it also needs `iam:PassRole` for the execution role. For HTTP Task with `revealSecrets`, add `states:RevealSecrets`. -```json -{ - "Version": "2012-10-17", - "Statement": [ - { "Effect": "Allow", "Action": ["states:TestState"], "Resource": "*" }, - { "Effect": "Allow", "Action": ["iam:PassRole"], "Resource": "arn:aws:iam::*:role/StepFunctions-*", "Condition": { "StringEquals": { "iam:PassedToService": "states.amazonaws.com" } } } - ] -} +```bash +aws stepfunctions test-state \ + --definition '{"Type":"Task","Resource":"arn:aws:states:::lambda:invoke","Arguments":{...},"End":true}' \ + --input '{"data":"value"}' \ + --mock '{"result":"{\"StatusCode\":200,\"Payload\":{\"body\":\"success\"}}"}' \ + --inspection-level DEBUG ``` ---- +## Inspection Levels -## API Overview +| Level | Returns | Use Case | +| --------- | ------------------------------------------------------------------------------- | ------------------- | +| **INFO** | `output`, `status`, `nextState` | Quick validation | +| **DEBUG** | + `afterArguments`, `result`, `variables` | Data flow debugging | +| **TRACE** | + HTTP `request`/`response` (use `--reveal-secrets` for auth) | HTTP Task debugging | -``` -aws stepfunctions test-state \ - --definition '' \ - --input '' \ - --role-arn # optional when using --mock \ - --inspection-level INFO|DEBUG|TRACE \ - --reveal-secrets # TRACE only, for HTTP Task secrets \ - --mock '' \ - --context '' \ - --state-configuration '' \ - --state-name '' # when --definition is a full state machine -``` +## Critical: Service-Specific Mock Structure -You can provide either a single state definition or a complete state machine with `--state-name` to test a specific state in context. Chain tests by feeding `output` and `nextState` from one call into the next. +**⚠️ Mocks MUST match AWS service API response schema exactly** — field names (case-sensitive), types, required fields. ---- +### Finding Mock Structure -## Inspection Levels +1. Identify service from `Resource` ARN: `arn:aws:states:::lambda:invoke` → Lambda `Invoke` API +2. Consult AWS SDK docs for that API's Response Syntax +3. Structure mock to match -### INFO (default) -Returns `status`, `output` (or error), and `nextState`. Use for quick pass/fail validation. +### Common Service Mocks -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Choice", "QueryLanguage": "JSONata", - "Choices": [{"Condition": "{% $states.input.orderTotal > 1000 %}", "Next": "PremiumFulfillment"}], - "Default": "StandardFulfillment" - }' \ - --input '{"orderId": "ORD-456", "orderTotal": 1500}' -``` +| Service | API | Mock Structure | Example | +| --------------- | ---------------- | --------------------------------------- | ----------------------------------------------------------------------------- | +| Lambda | `Invoke` | `{StatusCode, Payload, FunctionError?}` | `'{"result":"{\"StatusCode\":200,\"Payload\":{\"body\":\"ok\"}}\"}'` | +| DynamoDB | `PutItem` | `{Attributes?}` | `'{"result":"{\"Attributes\":{\"id\":{\"S\":\"123\"}}}"}'` | +| DynamoDB | `GetItem` | `{Item?}` | `'{"result":"{\"Item\":{\"id\":{\"S\":\"123\"}}}"}'` | +| SNS | `Publish` | `{MessageId}` | `'{"result":"{\"MessageId\":\"abc-123\"}"}'` | +| SQS | `SendMessage` | `{MessageId, MD5OfMessageBody}` | `'{"result":"{\"MessageId\":\"xyz\",\"MD5OfMessageBody\":\"...\"}"}'` | +| EventBridge | `PutEvents` | `{FailedEntryCount, Entries[]}` | `'{"result":"{\"FailedEntryCount\":0,\"Entries\":[{\"EventId\":\"123\"}]}"}'` | +| S3 | `PutObject` | `{ETag, VersionId?}` | `'{"result":"{\"ETag\":\"\\\"abc123\\\"\"}"}'` | +| Step Functions | `StartExecution` | `{ExecutionArn, StartDate}` | `'{"result":"{\"ExecutionArn\":\"arn:...\",\"StartDate\":\"...\"}"}'` | +| Secrets Manager | `GetSecretValue` | `{ARN, Name, SecretString?}` | `'{"result":"{\"Name\":\"MySecret\",\"SecretString\":\"...\"}"}'` | -Response: -```json -{ "output": "{\"orderId\": \"ORD-456\", \"orderTotal\": 1500}", "nextState": "PremiumFulfillment", "status": "SUCCEEDED" } -``` +**For `.sync` patterns:** Mock the **polling API** (e.g., `startExecution.sync:2` → mock `DescribeExecution`, NOT `StartExecution`) -### DEBUG -Returns everything in INFO plus `inspectionData` showing data at each transformation step. For JSONata states, the key fields are: +### Mock Syntax -| inspectionData field | What it shows | -|---|---| -| `input` | Raw state input | -| `afterArguments` | Input after `Arguments` evaluation | -| `result` | Raw task/service result | -| `afterOutput` | Final output after `Output` evaluation | +**Success:** `--mock '{"result":""}'`\ +**Error:** `--mock '{"errorOutput":{"error":"ErrorCode","cause":"description"}}'`\ +**Validation:** `--mock '{"fieldValidationMode":"STRICT|PRESENT|NONE","result":"..."}'` -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Pass", "QueryLanguage": "JSONata", - "Output": { - "summary": "{% '\''Order '\'' & $states.input.orderId & '\'': '\'' & $string($count($states.input.items)) & '\'' items'\'' %}", - "total": "{% $sum($states.input.items.price) %}" - }, - "Next": "ProcessPayment" - }' \ - --input '{"orderId": "ORD-789", "items": [{"name": "Widget", "price": 25}, {"name": "Gadget", "price": 75}]}' \ - --inspection-level DEBUG -``` +**Validation modes:** -### TRACE -For HTTP Task states only. Returns the raw HTTP request and response. Add `--reveal-secrets` to include auth headers from EventBridge connections. +- `STRICT` (default): All required fields, correct types — use in CI/CD +- `PRESENT`: Only validate fields present — flexible testing +- `NONE`: No validation — quick prototyping only -``` +## Testing Map States + +Tests Map's **input/output processing**, not iterations inside. Mock = entire Map output. + +```bash aws stepfunctions test-state \ --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::http:invoke", - "Arguments": {"Method": "GET", "ApiEndpoint": "https://httpbin.org/get", - "Authentication": {"ConnectionArn": "arn:aws:events:us-east-1:123456789012:connection/MyConnection/abc123"}, - "QueryParameters": {"orderId": "{% $states.input.orderId %}"}}, - "End": true + "Type":"Map", + "Items":"{% $states.input.items %}", + "ItemSelector":{"value":"{% $states.context.Map.Item.Value %}"}, + "ItemProcessor":{"ProcessorConfig":{"Mode":"INLINE"},...}, + "End":true }' \ - --role-arn arn:aws:iam::123456789012:role/StepFunctionsHttpRole \ - --input '{"orderId": "ORD-123"}' \ - --inspection-level TRACE --reveal-secrets + --input '{"items":[1,2,3]}' \ + --mock '{"result":"[10,20,30]"}' \ + --inspection-level DEBUG ``` -The response includes `inspectionData.request` (URL, method, headers) and `inspectionData.response` (status, headers, body). The `--reveal-secrets` flag exposes auth headers injected by the EventBridge connection. - ---- +**DEBUG returns:** `afterItemSelector`, `afterItemBatcher`, `toleratedFailureCount`, `maxConcurrency` -## Mocking Service Integrations +**Distributed Map:** Provide data in input (as if read from S3)\ +**Failure threshold testing:** Use `--state-configuration '{"mapIterationFailureCount":N}'`\ +**Testing state within Map:** `--state-name` auto-populates `$states.context.Map.Item.Index`, `$states.context.Map.Item.Value` -Mock results let you test state logic without calling real AWS services and without an execution role. +## Testing Parallel States -### Mock a successful result +Mock = JSON array, one element per branch (in definition order): -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ValidateOrder:$LATEST", - "Payload": {"orderId": "{% $states.input.orderId %}", "items": "{% $states.input.items %}"}}, - "Output": {"validated": "{% $states.result.Payload.valid %}", "orderId": "{% $states.input.orderId %}"}, - "End": true - }' \ - --input '{"orderId": "ORD-123", "items": [{"productId": "PROD-A", "quantity": 2}]}' \ - --mock '{"fieldValidationMode": "NONE", "result": "{\"Payload\": {\"valid\": true, \"orderId\": \"ORD-123\"}}"}' +```bash +--mock '{"result":"[{\"branch1\":\"result1\"},{\"branch2\":\"result2\"}]"}' ``` -Note: The Lambda optimized integration deserializes `Payload` at runtime, so `$states.result.Payload.valid` works in real executions. When mocking, use `fieldValidationMode: NONE` because the mock schema expects `Payload` as a string (matching the raw API), but the optimized integration presents it as an object. +## Testing Error Handling -### Mock an error +### Retry Logic -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ProcessPayment:$LATEST", - "Payload": {"orderId": "{% $states.input.orderId %}", "amount": "{% $states.input.total %}"}}, - "Retry": [{"ErrorEquals": ["Lambda.ServiceException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2.0}], - "Catch": [{"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "PaymentFailed"}], - "Next": "ShipOrder" - }' \ - --input '{"orderId": "ORD-123", "total": 150.00}' \ - --state-configuration '{"retrierRetryCount": 3}' \ - --mock '{"errorOutput": {"error": "Lambda.ServiceException", "cause": "Payment gateway unavailable"}}' +```bash +--state-configuration '{"retrierRetryCount":1}' \ +--mock '{"errorOutput":{"error":"Lambda.ServiceException","cause":"..."}}' \ +--inspection-level DEBUG ``` -Note: `retrierRetryCount: 3` exhausts the Retry (MaxAttempts=3), so the error falls through to Catch. Without `--state-configuration`, the default retry count is 0 and the status would be `RETRIABLE`. +Response includes: `status:"RETRIABLE"`, `retryBackoffIntervalSeconds`, `retryIndex` -You cannot provide both `mock.result` and `mock.errorOutput` in the same call. +### Catch Handlers -### Mock Validation Modes +```bash +--mock '{"errorOutput":{"error":"Lambda.TooManyRequestsException","cause":"..."}}' \ +--inspection-level DEBUG +``` -Control how strictly mocked responses are validated against AWS API models: +Response includes: `status:"CAUGHT_ERROR"`, `nextState`, `catchIndex`, error in `output` -| Mode | Behavior | -|---|---| -| `STRICT` (default) | Enforces field names, types, required fields from API model | -| `PRESENT` | Validates only fields present in mock, ignores unknown fields | -| `NONE` | Skips validation entirely | +### Error Propagation in Map/Parallel -``` ---mock '{"fieldValidationMode": "STRICT", "result": "{\"Attributes\": {}}"}' +```bash +--state-name "ChildState" \ +--state-configuration '{"errorCausedByState":"ChildState"}' \ +--mock '{"errorOutput":{"error":"States.TaskFailed","cause":"..."}}' ``` ---- - -## Testing Retry and Error Handling +## Testing .sync and .waitForTaskToken -### Simulating a specific retry attempt +**Required:** Must provide mock (validation exception otherwise) -Use `stateConfiguration.retrierRetryCount` to simulate a state on its Nth retry: +### .sync Patterns -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", - "Payload": {"orderId": "{% $states.input.orderId %}", "amount": "{% $states.input.total %}"}}, - "Retry": [{"ErrorEquals": ["Lambda.ServiceException"], "IntervalSeconds": 2, "MaxAttempts": 3, "BackoffRate": 2.0}], - "Catch": [{"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "PaymentFailed"}], - "End": true - }' \ - --input '{"orderId": "ORD-123", "total": 99.99}' \ - --state-configuration '{"retrierRetryCount": 1}' \ - --mock '{"errorOutput": {"error": "Lambda.ServiceException", "cause": "Payment gateway timeout"}}' \ - --inspection-level DEBUG -``` +Mock the **polling API**, not initial call: -Response: -```json -{ - "status": "RETRIABLE", - "inspectionData": { "errorDetails": { "retryBackoffIntervalSeconds": 4, "retryIndex": 0 } } -} +```bash +# startExecution.sync:2 → mock DescribeExecution +--mock '{"result":"{\"Status\":\"SUCCEEDED\",\"Output\":\"{...}\"}"}' ``` -`status: "RETRIABLE"` means the error matched a Retry and attempts remain. `retryBackoffIntervalSeconds` shows the computed delay. Increase `retrierRetryCount` to `3` (MaxAttempts) to see the error fall through to Catch. +Common patterns: `startExecution.sync:2`→`DescribeExecution`, `batch:submitJob.sync`→`DescribeJobs`, `glue:startJobRun.sync`→`GetJobRun` -### Testing Catch handlers - -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::sqs:sendMessage", - "Arguments": {"QueueUrl": "https://sqs.us-east-1.amazonaws.com/123456789012/FulfillmentQueue", - "MessageBody": "{% $string({'\''orderId'\'': $states.input.orderId, '\''items'\'': $states.input.items}) %}"}, - "Catch": [ - {"ErrorEquals": ["SQS.QueueDoesNotExistException"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "CreateQueue"}, - {"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "OrderFailed"} - ], - "Next": "WaitForFulfillment" - }' \ - --input '{"orderId": "ORD-123", "items": [{"productId": "PROD-A", "quantity": 2}]}' \ - --mock '{"errorOutput": {"error": "SQS.QueueDoesNotExistException", "cause": "Queue not found"}}' \ - --inspection-level DEBUG -``` +### .waitForTaskToken -Response: -```json -{ - "status": "CAUGHT_ERROR", - "nextState": "CreateQueue", - "error": "SQS.QueueDoesNotExistException", - "cause": "Queue not found", - "inspectionData": { "errorDetails": { "catchIndex": 0 } } -} +```bash +--context '{"Task":{"Token":"test-token-123"}}' \ +--mock '{"result":"{\"StatusCode\":200,\"Payload\":{\"status\":\"approved\"}}"}' ``` -Assert on: `status` = `CAUGHT_ERROR`, `nextState` matches expected handler, `catchIndex` identifies which Catch block fired. +## Activity States ---- +Require mock: -## Testing Map and Parallel States - -Map and Parallel states require a mock. The mock represents the output of the entire Map/Parallel execution — you are testing the state's input/output processing, not the inner processor. +```bash +--definition '{"Type":"Task","Resource":"arn:aws:states:...:activity:MyActivity",...}' \ +--mock '{"result":"{\"result\":\"completed\"}"}' +``` -### Map state +## Chaining Tests (Integration Testing) -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Map", "QueryLanguage": "JSONata", - "Items": "{% $states.input.orders %}", - "ItemSelector": {"order": "{% $states.context.Map.Item.Value %}", "index": "{% $states.context.Map.Item.Index %}"}, - "MaxConcurrency": 5, - "ItemProcessor": {"ProcessorConfig": {"Mode": "INLINE"}, "StartAt": "FulfillOrder", - "States": {"FulfillOrder": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:FulfillOrder:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}}, - "Output": {"results": "{% $states.result %}", "totalProcessed": "{% $count($states.result) %}"}, - "End": true - }' \ - --input '{"orders": [{"orderId": "ORD-1", "total": 50}, {"orderId": "ORD-2", "total": 75}, {"orderId": "ORD-3", "total": 120}]}' \ - --mock '{"result": "[{\"status\": \"shipped\"}, {\"status\": \"shipped\"}, {\"status\": \"shipped\"}]"}' \ - --inspection-level DEBUG +```bash +RESULT_1=$(aws stepfunctions test-state --state-name "State1" ... | jq -r '.output') +NEXT_1=$(... | jq -r '.nextState') +RESULT_2=$(aws stepfunctions test-state --state-name "$NEXT_1" --input "$RESULT_1" ...) ``` -DEBUG `inspectionData` for Map includes: `afterItemSelector` (per-item transformed input), `afterItemBatcher` (if batching), `toleratedFailureCount`, `toleratedFailurePercentage`, `maxConcurrency`. +Validates: data transformations, state transitions, end-to-end paths -### Parallel state +## Context Fields -Mock result must be a JSON array with one element per branch, in branch order: +Test states referencing execution context: +```bash +--context '{ + "Execution":{"Id":"arn:...","Name":"test-123","StartTime":"2024-01-01T10:00:00.000Z"}, + "State":{"Name":"ProcessData","EnteredTime":"2024-01-01T10:00:05.000Z"}, + "Task":{"Token":"test-token-abc123"} +}' ``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Parallel", "QueryLanguage": "JSONata", - "Branches": [ - {"StartAt": "ReserveInventory", "States": {"ReserveInventory": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ReserveInventory:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}}, - {"StartAt": "ChargePayment", "States": {"ChargePayment": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargePayment:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}} - ], - "Output": {"inventory": "{% $states.result[0] %}", "payment": "{% $states.result[1] %}"}, - "End": true - }' \ - --input '{"orderId": "ORD-123", "total": 99.99}' \ - --mock '{"result": "[{\"reserved\": true}, {\"charged\": true}]"}' + +## HTTP Tasks (TRACE) + +```bash +--resource "arn:aws:states:::http:invoke" \ +--inspection-level TRACE \ +--reveal-secrets # Requires states:RevealSecrets permission ``` -### Error propagation in Map/Parallel +Returns: `inspectionData.request` (method, URL, headers, body), `inspectionData.response` (status, headers, body) -Use `stateConfiguration.errorCausedByState` to specify which sub-state threw the error: +## Troubleshooting -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Map", "QueryLanguage": "JSONata", - "Items": "{% $states.input.orders %}", - "ItemSelector": {"order": "{% $states.context.Map.Item.Value %}", "index": "{% $states.context.Map.Item.Index %}"}, - "MaxConcurrency": 5, - "ItemProcessor": {"ProcessorConfig": {"Mode": "INLINE"}, "StartAt": "FulfillOrder", - "States": {"FulfillOrder": {"Type": "Task", "QueryLanguage": "JSONata", "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:FulfillOrder:$LATEST", "Payload": "{% $states.input %}"}, "End": true}}}, - "Catch": [{"ErrorEquals": ["States.ALL"], "Assign": {"errorInfo": "{% $states.errorOutput %}"}, "Next": "HandleMapError"}], - "Output": {"results": "{% $states.result %}", "totalProcessed": "{% $count($states.result) %}"}, - "Next": "Done" - }' \ - --input '{"orders": [{"orderId": "ORD-1", "total": 50}, {"orderId": "ORD-2", "total": 75}]}' \ - --state-configuration '{"errorCausedByState": "FulfillOrder"}' \ - --mock '{"errorOutput": {"error": "States.TaskFailed", "cause": "Fulfillment service unavailable"}}' -``` +| Error | Fix | +| ----------------------- | ---------------------------------------------- | +| Invalid field type | Check AWS SDK docs for correct types | +| Required field missing | Add field OR use `fieldValidationMode:PRESENT` | +| .sync validation failed | Mock polling API, not initial call | ---- +**Debug workflow:** -## Providing Context +1. Start `fieldValidationMode:NONE` for logic testing +2. Switch to `PRESENT` for partial validation +3. Use `STRICT` in CI/CD -Supply custom context values for states that reference `$states.context`: +## Test Automation Pattern -``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::dynamodb:putItem", - "Arguments": {"TableName": "OrderAuditTable", "Item": { - "orderId": {"S": "{% $states.input.orderId %}"}, - "executionId": {"S": "{% $states.context.Execution.Id %}"}, - "processedAt": {"S": "{% $states.context.State.EnteredTime %}"}}}, - "End": true - }' \ - --input '{"orderId": "ORD-123"}' \ - --context '{"Execution": {"Id": "arn:aws:states:us-east-1:123456789012:execution:OrderProcessing:exec-001", "Name": "exec-001"}, "State": {"Name": "AuditOrder", "EnteredTime": "2026-03-27T10:00:00Z"}}' \ - --mock '{"result": "{}"}' -``` +```bash +#!/bin/bash +test_state() { + local state_name=$1 + local input=$2 + local mock=$3 + + aws stepfunctions test-state \ + --definition "$(cat statemachine.asl.json)" \ + --state-name "$state_name" \ + --input "$input" \ + --mock "$mock" \ + --inspection-level DEBUG +} -When testing a state inside a Map (via `--state-name`), TestState auto-populates `Map.Item.Index` = 0 and `Map.Item.Value` = your input if you omit `--context`. +# Test chain +RESULT=$(test_state "State1" '{"id":"123"}' '{"result":"..."}' | jq -r '.output') +test_state "State2" "$RESULT" '{"result":"..."}' +``` ---- +## Best Practices -## Testing a State Within a Full State Machine +1. **Always verify mock structure** against AWS SDK docs for the specific service +2. **For .sync, mock polling API** (DescribeX/GetX), not initial call +3. **Use STRICT validation in CI/CD** to catch mismatches early +4. **Test all error paths** with appropriate error codes +5. **Chain tests** to validate multi-state execution paths +6. **Start with NONE→PRESENT→STRICT** when developing mocks +7. **Use DEBUG for data flow**, TRACE for HTTP debugging +8. **Mock external dependencies** to isolate state machine logic +9. **Test Map failure thresholds** with `mapIterationFailureCount` +10. **Never commit `--reveal-secrets` output** to version control -Use `--state-name` to test a specific state in the context of a complete definition. Chain tests by feeding `output` and `nextState` from one call into the next: +## Quick Reference -``` -aws stepfunctions test-state \ - --definition '{"QueryLanguage": "JSONata", "StartAt": "ValidateOrder", "States": { - "ValidateOrder": {"Type": "Pass", "Assign": {"validated": true}, "Output": "{% $states.input %}", "Next": "ProcessPayment"}, - "ProcessPayment": {"Type": "Task", "Resource": "arn:aws:states:::lambda:invoke", - "Arguments": {"FunctionName": "arn:aws:lambda:us-east-1:123456789012:function:ChargeCard:$LATEST", - "Payload": {"orderId": "{% $states.input.orderId %}", "amount": "{% $states.input.total %}"}}, - "Output": "{% $states.result.Payload %}", "End": true} - }}' \ - --state-name ValidateOrder \ - --input '{"orderId": "ORD-123", "total": 99.99}' -``` +```bash +# Basic test +aws stepfunctions test-state --definition '{...}' --input '{...}' --mock '{...}' -Then use the output as input to test `ProcessPayment`. +# Test specific state in state machine +aws stepfunctions test-state --definition "$(cat sm.json)" --state-name "MyState" --input '{...}' --mock '{...}' ---- +# Test retry (2nd attempt) +--state-configuration '{"retrierRetryCount":1}' --mock '{"errorOutput":{...}}' -## Activity, .sync, and .waitForTaskToken States +# Test Map failure threshold +--state-configuration '{"mapIterationFailureCount":5}' --mock '{"errorOutput":{...}}' -These patterns require a mock — calling TestState without one returns a validation exception. +# Test with context +--context '{"Execution":{"Id":"..."}, "Task":{"Token":"..."}}' -For `.sync` integrations, the mock is validated against the polling API schema, not the initial API. For example, `startExecution.sync:2` validates against `DescribeExecution` (which Step Functions polls), not `StartExecution`. +# HTTP Task with secrets +--inspection-level TRACE --reveal-secrets +# Mock validation modes +--mock '{"fieldValidationMode":"STRICT|PRESENT|NONE","result":"..."}' ``` -aws stepfunctions test-state \ - --definition '{ - "Type": "Task", "QueryLanguage": "JSONata", - "Resource": "arn:aws:states:::states:startExecution.sync:2", - "Arguments": {"StateMachineArn": "arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment", - "Input": "{% $string($states.input) %}"}, - "Output": "{% $parse($states.result.Output) %}", - "End": true - }' \ - --input '{"orderId": "ORD-123", "items": [{"productId": "PROD-A", "quantity": 2}]}' \ - --mock '{"result": "{\"ExecutionArn\": \"arn:aws:states:us-east-1:123456789012:execution:OrderFulfillment:exec-001\", \"StateMachineArn\": \"arn:aws:states:us-east-1:123456789012:stateMachine:OrderFulfillment\", \"StartDate\": \"2026-03-27T10:00:00Z\", \"Status\": \"SUCCEEDED\", \"Output\": \"{\\\"status\\\": \\\"fulfilled\\\"}\"}"}' -``` - -Note: The `.sync:2` mock is validated against the `DescribeExecution` response schema (which Step Functions polls), not `StartExecution`. Required fields include `ExecutionArn`, `StateMachineArn`, `StartDate`, and `Status`. --- + diff --git a/aws-step-functions/steering/variables-and-data.md b/aws-step-functions/steering/variables-and-data.md index e37c3d9..132d6e5 100644 --- a/aws-step-functions/steering/variables-and-data.md +++ b/aws-step-functions/steering/variables-and-data.md @@ -1,4 +1,20 @@ -# Variables and Data Transformation (JSONata Mode) +# Variables and Data Transformation (JSONata) + +## Field Quick Reference + +| Field | Purpose | Available In | +|-------|---------|-------------| +| `Type` | State type identifier | Task, Parallel, Map, Pass, Wait, Choice, Succeed, Fail | +| `Comment` | Human-readable description | Task, Parallel, Map, Pass, Wait, Choice, Succeed, Fail | +| `Output` | Transform state output | Task, Parallel, Map, Pass, Wait, Choice, Succeed | +| `Assign` | Store workflow variables | Task, Parallel, Map, Pass, Wait, Choice | +| `Next` / `End` | Transition control | Task, Parallel, Map, Pass, Wait | +| `Arguments` | Input to task/branches | Task, Parallel | +| `Retry` & `Catch` | Error handling | Task, Parallel, Map | +| `Items` | Array for iteration | Map | +| `ItemSelector` | Reshape each item before processing | Map | +| `Condition` | Boolean branching | Choice (inside rules) | +| `Error` & `Cause` | Error name and description (accept JSONata) | Fail | ## JSONata Expression Syntax @@ -63,26 +79,6 @@ $states = { } ``` -### Where Each Field Is Accessible - -| Field | Accessible In | -|-------|--------------| -| `$states.input` | All fields that accept JSONata, in any state | -| `$states.result` | Top-level `Output` and `Assign` in Task, Parallel, Map states | -| `$states.errorOutput` | `Output` and `Assign` inside a `Catch` block | -| `$states.context` | All fields that accept JSONata, in any state | - -### Context Object - -`$states.context` provides execution metadata: - -```json -"executionId": "{% $states.context.Execution.Id %}", -"startTime": "{% $states.context.Execution.StartTime %}", -"stateName": "{% $states.context.State.Name %}", -"originalInput": "{% $states.context.Execution.Input %}" -``` - Useful context fields: - `$states.context.Execution.Id` — Execution ARN - `$states.context.Execution.Input` — Original workflow input @@ -164,12 +160,6 @@ Prepend the variable name with `$`: } ``` -### States That Support Assign - -Pass, Task, Map, Parallel, Choice, Wait — all support `Assign`. - -Succeed and Fail do NOT support `Assign`. - ### Assign in Choice Rules and Catch Choice Rules and Catch blocks can each have their own `Assign`: @@ -286,7 +276,7 @@ In a Catch block on a Parallel or Map state, `Assign` can assign values to varia ### Arguments -Provides input to Task and Parallel states (replaces JSONPath `Parameters`): +Provides input to Task and Parallel states: ```json "Arguments": { @@ -296,17 +286,9 @@ Provides input to Task and Parallel states (replaces JSONPath `Parameters`): } ``` -Or as a single JSONata expression: - -```json -"Arguments": "{% $states.input.payload %}" -``` - -`Arguments` can reference `$states.input` and `$states.context`, but NOT `$states.result` or `$states.errorOutput`. - ### Output -Transforms the state output (replaces JSONPath `ResultSelector` + `ResultPath` + `OutputPath`): +Transforms the state output: ```json "Output": { @@ -316,14 +298,6 @@ Transforms the state output (replaces JSONPath `ResultSelector` + `ResultPath` + } ``` -Or as a single expression or literal value: - -```json -"Output": "{% $states.result.Payload %}" -"Output": 42 -"Output": { "status": "done" } -``` - If `Output` is not provided: - Task, Parallel, Map: state output = the result - All other states: state output = the state input @@ -496,3 +470,16 @@ Useful for sorting timestamps, calculating durations, or finding the most recent "mostRecent": "{% $sort($states.input.timestamps, function($a, $b) { $toMillis($a) < $toMillis($b) })[0] %}" } ``` + +**Built-in Step Functions JSONata functions:** + +| Function | Purpose | +|----------|---------| +| `$partition(array, size)` | Partition array into chunks | +| `$range(start, end, step)` | Generate array of values | +| `$hash(data, algorithm)` | Calculate hash (MD5, SHA-1, SHA-256, SHA-384, SHA-512) | +| `$random([seed])` | Random number 0 ≤ n < 1, optional seed | +| `$uuid()` | Generate v4 UUID | +| `$parse(jsonString)` | Deserialize JSON string | + +Plus all [built-in JSONata functions](https://github.com/jsonata-js/jsonata/tree/master/docs)