diff --git a/automation-api/lib/pilot/generate_eval_prompts.py b/automation-api/lib/pilot/generate_eval_prompts.py
index ea916aa..ff6c2c1 100644
--- a/automation-api/lib/pilot/generate_eval_prompts.py
+++ b/automation-api/lib/pilot/generate_eval_prompts.py
@@ -207,7 +207,12 @@ def generate_eval_prompts(
                         option_c_correctness=question_row["option_c_correctness"],
                     )
 
-                    custom_id = f"{prompt_id}-eval-{metric_id}"
+                    # FIXME: anthropic expect custom id less than 64 chars.
+                    # We should just update the generate_prompt.py to use shorter
+                    # custom_id and no need to do it here.
+                    custom_id = f"{prompt_id}-{metric_id}".replace("-question-", "-q-")
+                    if len(custom_id) > 64:
+                        raise ValueError("custom_id too long")
                     prompt_id_mapping.append((custom_id, eval_prompt))
 
                     if format == JsonlFormat.OPENAI: