@@ -2000,10 +2000,10 @@ class ScoreResponse(OpenAIBaseModel):
20002000 usage : UsageInfo
20012001
20022002
2003- class ClassificationRequest (OpenAIBaseModel ):
2003+ class ClassificationCompletionRequest (OpenAIBaseModel ):
20042004 model : str | None = None
20052005 input : list [str ] | str
2006- truncate_prompt_tokens : int | None = None
2006+ truncate_prompt_tokens : Annotated [ int , Field ( ge = - 1 )] | None = None
20072007 user : str | None = None
20082008
20092009 # --8<-- [start:classification-extra-params]
@@ -2015,7 +2015,21 @@ class ClassificationRequest(OpenAIBaseModel):
20152015 "if the served model does not use priority scheduling."
20162016 ),
20172017 )
2018-
2018+ add_special_tokens : bool = Field (
2019+ default = True ,
2020+ description = (
2021+ "If true (the default), special tokens (e.g. BOS) will be added to "
2022+ "the prompt."
2023+ ),
2024+ )
2025+ request_id : str = Field (
2026+ default_factory = lambda : f"{ random_uuid ()} " ,
2027+ description = (
2028+ "The request_id related to this request. If the caller does "
2029+ "not set it, a random_uuid will be generated. This id is used "
2030+ "through out the inference process and return in response."
2031+ ),
2032+ )
20192033 softmax : bool | None = Field (
20202034 default = None ,
20212035 description = "softmax will be deprecated, please use use_activation instead." ,
@@ -2040,6 +2054,102 @@ def to_pooling_params(self):
20402054 )
20412055
20422056
2057+ class ClassificationChatRequest (OpenAIBaseModel ):
2058+ model : str | None = None
2059+ messages : list [ChatCompletionMessageParam ]
2060+ truncate_prompt_tokens : Annotated [int , Field (ge = - 1 )] | None = None
2061+ user : str | None = None
2062+
2063+ # --8<-- [start:chat-classification-extra-params]
2064+ add_generation_prompt : bool = Field (
2065+ default = False ,
2066+ description = (
2067+ "If true, the generation prompt will be added to the chat template. "
2068+ "This is a parameter used by chat template in tokenizer config of the "
2069+ "model."
2070+ ),
2071+ )
2072+
2073+ add_special_tokens : bool = Field (
2074+ default = False ,
2075+ description = (
2076+ "If true, special tokens (e.g. BOS) will be added to the prompt "
2077+ "on top of what is added by the chat template. "
2078+ "For most models, the chat template takes care of adding the "
2079+ "special tokens so this should be set to false (as is the "
2080+ "default)."
2081+ ),
2082+ )
2083+
2084+ chat_template : str | None = Field (
2085+ default = None ,
2086+ description = (
2087+ "A Jinja template to use for this conversion. "
2088+ "As of transformers v4.44, default chat template is no longer "
2089+ "allowed, so you must provide a chat template if the tokenizer "
2090+ "does not define one."
2091+ ),
2092+ )
2093+
2094+ chat_template_kwargs : dict [str , Any ] | None = Field (
2095+ default = None ,
2096+ description = (
2097+ "Additional keyword args to pass to the template renderer. "
2098+ "Will be accessible by the chat template."
2099+ ),
2100+ )
2101+
2102+ mm_processor_kwargs : dict [str , Any ] | None = Field (
2103+ default = None ,
2104+ description = ("Additional kwargs to pass to the HF processor." ),
2105+ )
2106+
2107+ priority : int = Field (
2108+ default = 0 ,
2109+ description = (
2110+ "The priority of the request (lower means earlier handling; "
2111+ "default: 0). Any priority other than 0 will raise an error "
2112+ "if the served model does not use priority scheduling."
2113+ ),
2114+ )
2115+
2116+ request_id : str = Field (
2117+ default_factory = lambda : f"{ random_uuid ()} " ,
2118+ description = (
2119+ "The request_id related to this request. If the caller does "
2120+ "not set it, a random_uuid will be generated. This id is used "
2121+ "through out the inference process and return in response."
2122+ ),
2123+ )
2124+ softmax : bool | None = Field (
2125+ default = None ,
2126+ description = "softmax will be deprecated, please use use_activation instead." ,
2127+ )
2128+
2129+ activation : bool | None = Field (
2130+ default = None ,
2131+ description = "activation will be deprecated, please use use_activation instead." ,
2132+ )
2133+
2134+ use_activation : bool | None = Field (
2135+ default = None ,
2136+ description = "Whether to use activation for classification outputs. "
2137+ "Default is True." ,
2138+ )
2139+ # --8<-- [end:chat-classification-extra-params]
2140+
2141+ def to_pooling_params (self ):
2142+ return PoolingParams (
2143+ truncate_prompt_tokens = self .truncate_prompt_tokens ,
2144+ use_activation = get_use_activation (self ),
2145+ )
2146+
2147+
2148+ ClassificationRequest : TypeAlias = (
2149+ ClassificationCompletionRequest | ClassificationChatRequest
2150+ )
2151+
2152+
20432153class ClassificationData (OpenAIBaseModel ):
20442154 index : int
20452155 label : str | None
0 commit comments