From 325452c5cbc5ce49ded2e914ac422416a2b17adf Mon Sep 17 00:00:00 2001
From: scarborough high school <tsnyder@scarboroughschools.org>
Date: Tue, 25 Mar 2025 11:36:41 -0400
Subject: [PATCH] rebase

---
 tensorrt_llm/commands/serve.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorrt_llm/commands/serve.py b/tensorrt_llm/commands/serve.py
index 0ff9e895b..19b9c4e1a 100644
--- a/tensorrt_llm/commands/serve.py
+++ b/tensorrt_llm/commands/serve.py
@@ -91,12 +91,17 @@
     help=
     "Path to a YAML file that overwrites the parameters specified by trtllm-serve."
 )
+@click.option("--chunked_context",
+              is_flag=True,
+              default=False,
+              help="Enable chunked context (chunked prefill)."
+)
 def main(model: str, tokenizer: Optional[str], host: str, port: int,
          log_level: str, backend: str, max_beam_width: int, max_batch_size: int,
          max_num_tokens: int, max_seq_len: int, tp_size: int, pp_size: int,
          ep_size: Optional[int], gpus_per_node: Optional[int],
          kv_cache_free_gpu_memory_fraction: float, num_postprocess_workers: int,
-         trust_remote_code: bool, extra_llm_api_options: Optional[str]):
+         trust_remote_code: bool, chunked_context: bool, extra_llm_api_options: Optional[str]):
     """Running an OpenAI API compatible server
 
     MODEL: model name | HF checkpoint path | TensorRT engine path
@@ -132,6 +137,7 @@ def main(model: str, tokenizer: Optional[str], host: str, port: int,
         "trust_remote_code": trust_remote_code,
         "build_config": build_config,
         "kv_cache_config": kv_cache_config,
+        "enable_chunked_prefill":chunked_context,
         "backend": backend if backend == "pytorch" else None,
         "pytorch_backend_config": pytorch_backend_config,
         "_num_postprocess_workers": num_postprocess_workers,