From d8baf6e6654058f4adb2bd1db114b2879c17bde7 Mon Sep 17 00:00:00 2001 From: Kunal Mansukhani Date: Sun, 19 Apr 2026 15:26:33 -0700 Subject: [PATCH] Remove svg on int8 kv cache attn --- .../96_int8_kv_cache_attention/challenge.html | 60 ------------------- 1 file changed, 60 deletions(-) diff --git a/challenges/medium/96_int8_kv_cache_attention/challenge.html b/challenges/medium/96_int8_kv_cache_attention/challenge.html index a7a9566..00822dc 100644 --- a/challenges/medium/96_int8_kv_cache_attention/challenge.html +++ b/challenges/medium/96_int8_kv_cache_attention/challenge.html @@ -9,66 +9,6 @@ float32.

- - - INT8 KV-Cache Attention — single token decode - - - - Q (fp32) - - - - K_int8 (int8) - - k_scale (fp32) - - - - V_int8 (int8) - - v_scale (fp32) - - - - - × - × - - - - K_float - - V_float - - - - - - - scores - - - softmax - - - - output - - - K[h,s,:] = K_int8[h,s,:] × k_scale[h,s] - V[h,s,:] = V_int8[h,s,:] × v_scale[h,s] - scores[h,s] = Q[h,:]·K[h,s,:] / √head_dim - w[h,:] = softmax(scores[h,:]) - out[h,:] = Σ_s w[h,s] · V[h,s,:] - - - - - - - -

Implementation Requirements