Cleaned up the toml file and added logic to use the toml

Sokserey Sun · Sokserey Sun · commit 1f054c715193 · 2025-11-08T17:31:30.000-08:00
diff --git a/scripts/generate_and_eval_single_sample.py b/scripts/generate_and_eval_single_sample.py
@@ -10,7 +10,7 @@
 from src.dataset import construct_kernelbench_dataset
 from src.eval import eval_kernel_against_ref
 from src.prompt_constructor import prompt_generate_custom_cuda_from_prompt_template
-from src.prompt_constructor_multilang import get_prompt_for_backend
+from src.prompt_constructor_multilang import get_prompt_for_language
 from src.utils import (
     create_inference_server_from_presets,
     extract_first_code,
@@ -147,7 +147,7 @@ def main(config: EvalConfig):
 
     # Use appropriate prompt constructor based on backend
     if config.backend in ["cuda", "triton", "cute"]:
-        custom_prompt = get_prompt_for_backend(ref_arch_src, config.backend)
+        custom_prompt = get_prompt_for_language(ref_arch_src, language=config.backend, option="few_shot")
     else:
         raise ValueError(
             f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', or 'cute'."
diff --git a/scripts/generate_and_eval_single_sample_modal.py b/scripts/generate_and_eval_single_sample_modal.py
@@ -16,7 +16,7 @@
 #from src.dataset import construct_kernelbench_dataset
 from src.eval import eval_kernel_against_ref
 from src.prompt_constructor import prompt_generate_custom_cuda_from_prompt_template
-from src.prompt_constructor_multilang import get_prompt_for_backend
+from src.prompt_constructor_multilang import get_prompt_for_language
 from src.utils import extract_first_code, query_server, set_gpu_arch, read_file, create_inference_server_from_presets
 
 app = modal.App("eval_single_sample")
@@ -193,7 +193,7 @@ def main(config: EvalConfig):
 
     # Use appropriate prompt constructor based on backend
     if config.backend in ["cuda", "triton", "cute"]:
-        custom_prompt = get_prompt_for_backend(ref_arch_src, config.backend)
+        custom_prompt = get_prompt_for_language(ref_arch_src, language=config.backend, option="few_shot")
     else:
         raise ValueError(f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', or 'cute'.")
         
diff --git a/scripts/generate_samples.py b/scripts/generate_samples.py
@@ -11,7 +11,7 @@
 from src.dataset import construct_kernelbench_dataset
 from src.eval import eval_kernel_against_ref
 from src.prompt_constructor import prompt_generate_custom_cuda_from_prompt_template
-from src.prompt_constructor_multilang import get_prompt_for_backend
+from src.prompt_constructor_multilang import get_prompt_for_language
 from src.utils import (
     create_inference_server_from_presets,
     extract_first_code,
@@ -121,7 +121,7 @@ def generate_sample_single(
 
     # Construct Prompt
     if config.backend in ["cuda", "triton", "cute"]:
-        custom_cuda_prompt = get_prompt_for_backend(ref_arch_src, config.backend)
+        custom_cuda_prompt = get_prompt_for_language(ref_arch_src, language=config.backend, option="few_shot")
     else:
         raise ValueError(
             f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', or 'cute'."
diff --git a/src/loader.py b/src/loader.py
@@ -37,12 +37,6 @@ def compose_blocks(self, keys: List[str]) -> str:
             text_parts.append(node.strip() + "\n")
         return "\n".join(text_parts).strip() + "\n"
 
-    def get_template_node(self, backend: str, template: str) -> Dict[str, Any]:
-        try:
-            return self.data["backends"][backend]["templates"][template]
-        except KeyError as e:
-            raise KeyError(f"Unknown backend/template: {backend}/{template}") from e
-
 def _gpu_context_from_py(py_path: str, gpu_name: str) -> Dict[str, str]:
     """
     Load GPU_* dicts from a Python file (no exec of raw strings; use runpy).
@@ -79,39 +73,97 @@ def _gpu_context_from_py(py_path: str, gpu_name: str) -> Dict[str, str]:
         "gpu_best_practices_bullets": best_bullets,
     }
 
-def render_prompt(
+def render_prompt_by_option(
     *,
     prompts_toml: str,
-    backend: str,
-    template: str,
+    language: str,
+    option: str,
     context: Dict[str, str],
     gpu_specs_py: Optional[str] = None,
     gpu_name: Optional[str] = None,
 ) -> str:
+    """
+    New function that uses languages.X and options.Y structure
+    
+    Args:
+        prompts_toml: Path to the prompts.toml file
+        language: The kernel language (triton, cuda, cute)
+        option: The prompt option (basic, few_shot, hardware_info, fix_compile, fix_correctness)
+        context: Variables to fill in the prompt template
+        gpu_specs_py: Optional path to GPU specs Python file
+        gpu_name: Optional GPU name (required if option requires_gpu)
+    """
     cfg = PromptConfig.from_toml(prompts_toml)
-    node = cfg.get_template_node(backend, template)
-
+    
+    # Get language-specific content
+    try:
+        lang_data = cfg.data["languages"][language]
+    except KeyError:
+        raise KeyError(f"Unknown language: {language}")
+    
+    # Get option configuration
+    try:
+        option_data = cfg.data["options"][option]
+    except KeyError:
+        raise KeyError(f"Unknown option: {option}")
+    
+    # Get shared templates
+    shared = cfg.data.get("shared", {})
+    language_display = lang_data.get("language_display", language.upper())
+    
+    # Fill in shared templates with language-specific terms
+    problem_statement = shared.get("problem_statement", "").format(language_display=language_display)
+    instruction = shared.get("instruction", "").format(language_display=language_display)
+    
+    # Add language-specific content to context
+    context = {
+        **context,
+        "language": language.upper() if language in ["cuda", "cute"] else language.capitalize(),
+        "language_display": language_display,
+        "problem_statement": problem_statement,
+        "instruction": instruction,
+    }
+    
     # Load example files if requested
-    if node.get("requires_example"):
-        ex_arch_path = _abs_path(node["example_arch_path"])
-        ex_new_path = _abs_path(node["example_new_arch_path"])
+    if option_data.get("requires_example"):
+        # Use language-specific example arch, or fall back to shared one
+        ex_arch_path = _abs_path(
+            lang_data.get("few_shot_example_arch") or shared.get("few_shot_example_arch")
+        )
+        ex_new_path = _abs_path(lang_data["few_shot_new_arch"])
         context = {
             **context,
             "example_arch_src": read_file(ex_arch_path),
             "example_new_arch_src": read_file(ex_new_path),
         }
-
-    # Load GPU details (from .py) if requested
-    if node.get("requires_gpu"):
+    
+    # Load GPU details if requested
+    if option_data.get("requires_gpu"):
         if not (gpu_specs_py and gpu_name):
-            raise ValueError("Template requires GPU info; provide gpu_specs_py and gpu_name")
+            raise ValueError(f"Option '{option}' requires GPU info; provide gpu_specs_py and gpu_name")
         context = {**context, **_gpu_context_from_py(_abs_path(gpu_specs_py), gpu_name)}
-
-    # Compose & fill
-    compose_keys = node["compose"]
-    prompt_text = cfg.compose_blocks(compose_keys)
-
+    
+    # Build the prompt from components
+    prompt_parts = []
+    for component in option_data["components"]:
+        if component == "problem_statement":
+            # Use the already-formatted problem_statement from context
+            prompt_parts.append(context["problem_statement"])
+        elif component == "instruction":
+            # Use the already-formatted instruction from context
+            prompt_parts.append(context["instruction"])
+        elif component.startswith("hardware_"):
+            # Hardware components from templates.hardware
+            template_key = f"templates.hardware.{component}"
+            prompt_parts.append(cfg.compose_blocks([template_key]))
+        else:
+            # Other components from templates.common
+            template_key = f"templates.common.{component}"
+            prompt_parts.append(cfg.compose_blocks([template_key]))
+    
+    prompt_text = "\n".join(prompt_parts).strip() + "\n"
+    
     try:
         return prompt_text.format(**context).strip() + "\n"
     except KeyError as e:
-        raise KeyError(f"Missing placeholder in context: {e.args[0]}") from e
+        raise KeyError(f"Missing placeholder in context: {e.args[0]}. Available: {list(context.keys())}") from e
diff --git a/src/prompt_constructor_multilang.py b/src/prompt_constructor_multilang.py
@@ -1,34 +1,59 @@
-# src/prompts/prompt_constructor.py  (public facade; keep old imports working)
+# src/prompt_constructor_multilang.py  (new option-based prompt constructor)
 import os
-from .loader import render_prompt, _abs_path
+from .loader import render_prompt_by_option, _abs_path
 
 REPO_TOP_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 PROMPTS_TOML = _abs_path("src/prompts/prompts.toml")
 GPU_SPECS_PY = "src/prompts/hardware/gpu_specs.py"  # still a Python file
 
-def get_prompt_for_backend(ref_arch_src: str, backend: str = "triton") -> str:
-    return render_prompt(
+def get_prompt_for_language(ref_arch_src: str, language: str = "triton", option: str = "few_shot") -> str:
+    """
+    Generate a prompt for a specific language and option.
+    
+    Args:
+        ref_arch_src: The reference architecture source code
+        language: The kernel language (triton, cuda, cute)
+        option: The prompt option (basic, few_shot, hardware_info)
+    """
+    return render_prompt_by_option(
         prompts_toml=PROMPTS_TOML,
-        backend=backend.lower(),
-        template="default",
+        language=language.lower(),
+        option=option,
         context={"ref_arch_src": ref_arch_src},
     )
 
-def get_prompt_with_hardware(ref_arch_src: str, backend: str, gpu_name: str) -> str:
-    return render_prompt(
+def get_prompt_with_hardware(ref_arch_src: str, language: str, gpu_name: str) -> str:
+    """
+    Generate a hardware-aware prompt for a specific language.
+    
+    Args:
+        ref_arch_src: The reference architecture source code
+        language: The kernel language (triton, cuda, cute)
+        gpu_name: The name of the GPU (e.g., "A100", "H100")
+    """
+    return render_prompt_by_option(
         prompts_toml=PROMPTS_TOML,
-        backend=backend.lower(),
-        template="with_hardware",
+        language=language.lower(),
+        option="hardware_info",
         context={"ref_arch_src": ref_arch_src},
-        gpu_specs_py=GPU_SPECS_PY,  # <-- python file, not TOML
+        gpu_specs_py=GPU_SPECS_PY,
         gpu_name=gpu_name,
     )
 
-def prompt_fix_compile(backend: str, ref_arch_src: str, custom_kernel: str, metadata: str) -> str:
-    return render_prompt(
+def prompt_fix_compile(language: str, ref_arch_src: str, custom_kernel: str, metadata: str) -> str:
+    """
+    Generate a prompt to fix compilation errors.
+    
+    Args:
+        language: The kernel language (triton, cuda, cute)
+        ref_arch_src: The reference architecture source code
+        custom_kernel: The custom kernel code that failed
+        metadata: Compilation error metadata
+    """
+    return render_prompt_by_option(
         prompts_toml=PROMPTS_TOML,
-        backend=backend.lower(),
-        template="fix_compile",
+        language=language.lower(),
+        option="fix_compile",
         context={
             "ref_arch_src": ref_arch_src,
             "custom_kernel": custom_kernel,
@@ -37,11 +62,20 @@ def prompt_fix_compile(backend: str, ref_arch_src: str, custom_kernel: str, meta
         },
     )
 
-def prompt_fix_correctness(backend: str, ref_arch_src: str, custom_kernel: str, metadata: str) -> str:
-    return render_prompt(
+def prompt_fix_correctness(language: str, ref_arch_src: str, custom_kernel: str, metadata: str) -> str:
+    """
+    Generate a prompt to fix correctness errors.
+    
+    Args:
+        language: The kernel language (triton, cuda, cute)
+        ref_arch_src: The reference architecture source code
+        custom_kernel: The custom kernel code that failed
+        metadata: Correctness error metadata
+    """
+    return render_prompt_by_option(
         prompts_toml=PROMPTS_TOML,
-        backend=backend.lower(),
-        template="fix_correctness",
+        language=language.lower(),
+        option="fix_correctness",
         context={
             "ref_arch_src": ref_arch_src,
             "custom_kernel": custom_kernel,
@@ -50,34 +84,9 @@ def prompt_fix_correctness(backend: str, ref_arch_src: str, custom_kernel: str,
         },
     )
 
-# Optional legacy convenience wrappers (if callers use backend-specific names)
-def prompt_fix_compile_triton(ref_arch_src, custom_kernel, metadata):
-    return prompt_fix_compile("triton", ref_arch_src, custom_kernel, metadata)
-
-def prompt_fix_correctness_triton(ref_arch_src, custom_kernel, metadata):
-    return prompt_fix_correctness("triton", ref_arch_src, custom_kernel, metadata)
-
-def prompt_fix_compile_cute(ref_arch_src, custom_kernel, metadata):
-    return prompt_fix_compile("cute", ref_arch_src, custom_kernel, metadata)
-
-def prompt_fix_correctness_cute(ref_arch_src, custom_kernel, metadata):
-    return prompt_fix_correctness("cute", ref_arch_src, custom_kernel, metadata)
-
-def prompt_fix_compile_cuda(ref_arch_src, custom_kernel, metadata):
-    return prompt_fix_compile("cuda", ref_arch_src, custom_kernel, metadata)
-
-def prompt_fix_correctness_cuda(ref_arch_src, custom_kernel, metadata):
-    return prompt_fix_correctness("cuda", ref_arch_src, custom_kernel, metadata)
-
 __all__ = [
-    "get_prompt_for_backend",
+    "get_prompt_for_language",
     "get_prompt_with_hardware",
     "prompt_fix_compile",
     "prompt_fix_correctness",
-    "prompt_fix_compile_triton",
-    "prompt_fix_correctness_triton",
-    "prompt_fix_compile_cute",
-    "prompt_fix_correctness_cute",
-    "prompt_fix_compile_cuda",
-    "prompt_fix_correctness_cuda",
 ]
diff --git a/src/prompts/prompts.toml b/src/prompts/prompts.toml