We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 19c68f8 commit b4ca8deCopy full SHA for b4ca8de
src/llmcompressor/modifiers/quantization/calibration.py
@@ -250,10 +250,11 @@ def calibrate_kv_cache_input_hook(
250
kv_cache = getattr(module, "kv_cache")
251
if not hasattr(module, "_past_kv_name"):
252
# Determine which past KV parameter name to use once and cache it
253
+ # TODO: Find a better place to cache this
254
module._past_kv_name = (
- "past_key_values" # transformers#39956
255
- if "past_key_values" in inspect.signature(module.forward).parameters
256
- else "past_key_value"
+ "past_key_value" # transformers#39956
+ if "past_key_value" in inspect.signature(module.forward).parameters
257
+ else "past_key_values"
258
)
259
260
kwargs[module._past_kv_name] = kv_cache
0 commit comments