From 46b6d6409a6aff53cad421a79502eb44a70cffb2 Mon Sep 17 00:00:00 2001 From: Meirzhan05 Date: Mon, 27 Apr 2026 20:12:34 -0400 Subject: [PATCH 1/2] Add submission: Coprime-Stride Loader + Full GPTQ + Score-First TTT 3-seed mean: 1.08008 BPB (std 0.0009), all artifacts under 16MB. records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/ - README.md - submission.json (3-seed metadata) - train_gpt.py (LZMA-compressed self-extracting training script) --- .../README.md | 71 +++++++++++++++++++ .../submission.json | 39 ++++++++++ .../train_gpt.py | 2 + 3 files changed, 112 insertions(+) create mode 100644 records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md create mode 100644 records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json create mode 100644 records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/train_gpt.py diff --git a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md new file mode 100644 index 0000000000..34d4daa82b --- /dev/null +++ b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md @@ -0,0 +1,71 @@ +# Coprime-Stride Loader + Full GPTQ + Score-First TTT + +**val_bpb: 1.08008** (3-seed mean, std 0.0009) | **~15.99 MB** | 8×H100 SXM + +## Results (8×H100 80GB SXM, PyTorch 2.9.1+cu128) + +| Seed | Steps | Pre-quant BPB | Quantized BPB | Sliding BPB | TTT BPB | Artifact | +|------|-------|---------------|---------------|-------------|---------|----------| +| 1337 | 4565 | 1.08596 | 1.09737 | 1.08058 | **1.07907** | 15,992,892 | +| 42 | 4570 | 1.08742 | 1.09877 | 1.08217 | **1.08075** | 15,996,411 | +| 2025 | 4566 | 1.08722 | 1.09874 | 1.08196 | **1.08043** | 15,993,485 | +| **Mean** | **4567** | **1.08686** | **1.09829** | **1.08157** | **1.08008** | **15,994,263** | + +## Key Innovations + +### 1. Coprime-Stride Multi-Shard Loader +Replaces the standard `ShuffledSequenceLoader` with a coprime-stride data loader (PR #726 style). Within each shard, sequences are accessed with a stride coprime to the block count, guaranteeing every block is visited exactly once per epoch without cyclic patterns. Adaptive shard selection uses progress-based weighting (alpha decays from 0.9 to 0.5) with interleaved bucket draining for maximum diversity per batch. + +**Effect:** +36 extra training steps (4565 vs 4529 baseline), better pre-quant BPB (1.0860 vs 1.0866). + +### 2. Full Hessian GPTQ with Cholesky Fallback +Standard GPTQ with Cholesky error compensation + actorder (column sorting by Hessian diagonal). SD-based clipping at 12.85σ for int6 matrices, 20σ for int8 embeddings. Added Cholesky fallback: if `torch.linalg.cholesky` fails on an ill-conditioned Hessian, falls back to simple per-row quantization instead of crashing. + +### 3. LZMA Code Compression +Full Python source (53KB) is LZMA-compressed + base85-encoded into a 2-line self-extracting .py file (18KB). Saves ~35KB in artifact size, keeping total under 16MB. Same technique as the current SOTA record. + +### 4. Score-First TTT (Legal) +Score-first per-chunk test-time training following the PR #461/#549 framework: +- Score each 32K-token chunk under `torch.no_grad()` first +- Then train on that chunk with SGD (momentum=0.9, LR=0.005, 3 epochs) +- Adapted model only scores future chunks — never rescores tokens it trained on + +## Architecture + +- SP8192 BPE tokenizer (8192 tokens) +- 11 physical layers, 17 virtual (depth recurrence: layers 3-5 looped 3×) +- dim=512, 8 heads, 4 KV heads (GQA), MLP 4× with LeakyReLU(0.5)² +- XSA on all 11 layers, parallel residuals from layer 7+ +- U-Net skip connections with learnable gates +- Tied embeddings, logit softcap=30 + +## Training + +- Muon optimizer (5-step Newton-Schulz) + AdamW for embeddings/scalars +- EMA (decay 0.9965) +- 72% warmdown, 20-step warmup + 20-step loop warmup +- Gradient clipping at 0.3 +- Brotli-11 compression + byte shuffling + +## Compliance + +### Condition 1 (Strict Causal Dependence) +Causal attention via `flash_attn_func(causal=True)`. TTT only incorporates tokens from already-scored chunks. + +### Condition 2 (Full Normalized Distribution) +Standard `F.cross_entropy` over full vocab_size logits. No top-k masking. + +### Condition 3 (Score-Before-Update) +Each chunk scored under `torch.no_grad()` before any training on that chunk. Model weights at scoring time reflect only prior chunks. + +### Condition 4 (Single Left-to-Right Pass) +Single `for ci in range(num_chunks)` loop. Each token scored exactly once. No rescoring or min-over-runs. + +## Credits +- SOTA base: PR #1394 by @clarkkev (Full Hessian GPTQ + SDClip) +- Depth recurrence: PR #1445 by @dexhunter +- Score-first TTT: PR #549 by @abaybektursun, PR #461 by @Christopher-Lee-McClendon +- Coprime-stride loader: PR #726 style +- XSA: PR #634 +- Parallel residuals: PR #1412 by @Robby955 +- LZMA code compression: PR #1394 technique diff --git a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json new file mode 100644 index 0000000000..4a60e5b5eb --- /dev/null +++ b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json @@ -0,0 +1,39 @@ +{ + "author": "Meirzhan Saparov", + "github_id": "Meirzhan05", + "name": "Coprime-Stride Loader + Full GPTQ + Score-First TTT", + "date": "2026-04-25", + "track": "10min_16mb", + "val_bpb": 1.08008, + "val_bpb_std": 0.00088, + "seeds": [1337, 42, 2025], + "seed_results": { + "1337": {"val_bpb": 1.07907, "artifact_bytes": 15992892}, + "42": {"val_bpb": 1.08075, "artifact_bytes": 15996411}, + "2025": {"val_bpb": 1.08043, "artifact_bytes": 15993485} + }, + "hardware": "8xH100 80GB SXM", + "pytorch_version": "2.9.1+cu128", + "record": false, + "technique_summary": "SP8192 + 11L + 3-Layer Depth Recurrence (L3-5) + Parallel Residuals (L7+) + XSA-all + Coprime-Stride Multi-Shard Loader + Full Hessian GPTQ with Cholesky Fallback + EMA 0.9965 + Score-First TTT (SGD 3ep) + Brotli + LZMA Code Compression", + "compliance": { + "train_under_600s": true, + "artifact_under_16mb": true, + "eval_under_600s": true, + "no_slot": true, + "no_pre_quant_ttt": true, + "no_etlb": true, + "no_ngram_cache": true, + "score_first_ttt": true, + "three_seeds": true + }, + "attribution": { + "sp8192_gptq_sdclip": "@clarkkev (PR #1394)", + "depth_recurrence": "@dexhunter (PR #1331, #1437)", + "parallel_residuals": "@Robby955 (PR #1412), @msisovic (PR #1204)", + "legal_ttt_framework": "@abaybektursun (PR #549), @Christopher-Lee-McClendon (PR #461)", + "coprime_stride_loader": "PR #726 style", + "lzma_code_compression": "PR #1394", + "current_sota_base": "@bigbag (PR for 2026-04-09 SOTA)" + } +} diff --git a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/train_gpt.py b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/train_gpt.py new file mode 100644 index 0000000000..dc575fee4e --- /dev/null +++ b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/train_gpt.py @@ -0,0 +1,2 @@ +import lzma as L,base64 as B +exec(L.decompress(B.b85decode(";Loo)EL{LJn@VT6Qap3bt~@<3h>ok~)Km^%c^ys%R{D_%yAk9-_tV7^coUOo3$w>`(`ci)t`2F7>r>Ltx>>S2CRw|7ov>Wn1e~_!RLQ=%V9g?)G3yPsu%SBy!lj1PaC-x%dDmCDOZ^r^!)+WWz}ejKXTJ#^U6Ra!};QocHHXQC+4UM!QQ!-N5Xd|%~a(9)bTYIO+>B~8~@lqmri%^qEkQUy074Rh6w7V_#^s9J-3BNA`G;qyR$LYcI?e+loZVWi~B$n=TKFp{%SeHYp{oNWh;U@Ahk8M2$OU%K8B$lb*dRQXd-GR_@*KAZdRdwSd#v=LSq1v@Puul=a7WXDmh1^kBj}Y2XlER!D2E{&{%lV(hz$#n5%+%sk&Q}>{y0xpRgiQQBJeVV0hy8UD3ntyo@(Pv+K7^zVRDt4bah(r8kfsZThb+H1)~K-lIr4`|V#-2R>G7pP*N!fwWd&Dq8C)y=NrG_U_Oz6Q?+@ok1?(VJ5?ZT~&}C4Ks38WRB>3i=I!}H-8qq=&yKJ;tbpwwn~lAseD^q1C*u5T;lKQtF;?zv@u0f36%6SXU~txi3v5iSPK*`fNE9531KaQDL`zTPF$MX4U(-3sY-&?>QJe)giBQzpor7H)AZ#4=Hn#`AoAL7tT){&bw(fgz|eQRt`#6-<>;m*+&$!nf|od6&lVKYYHuOoNgZU_L>E@!O%__mlt=);Hwdc43+CM?sh5y+my3XSVYMO8F1pXuq$fvTU<$mpDjr>Lm){DeV)>4AKAhA?jxjH<-3yYQ#5qz+4c`Utifny+Ydmr4?c_z60#9@FU+U1&O$Lfg$WrX7gCj50O1t`1A`k04LVr;^*~{|@(TS5>#TAjL(B`umc8bVA$bS|F?^2A7E}z7IIgZlY(8Ex#K+nLh0vzlKK=74U!g+sX4T?e3_^_7XB1A(HB{pYd{vHYcak_P3DZ2LAB20wAP+C_9p7R|0}wA=p~JFi&xD8H}n(LxCc5rcmwF`!s(tSf_~xvM%FXBDVM`89RsXcky^uotc$(?azGHT7t|M*u<*V?7sSTe>$5cSCRwCfEdRa`>462k+DkjnacZz`)@ZzF^f9w{e@vt+OWnjxD(M;0?2b|a74m35Vk_XRNe5RC=*q72--M!~CDWs^K$vdC@Kpc`MI)JO+$ESF2w_V1$CCp>ATgRS|~NL_Vj!y8c0ccKi_*XpL;ZT_=tbI-ujeH+)1!QTOWN2z;{>mvx(%HgFFLufJpD6i<3=PC$jGwm6{;s!=kFcV{2oLp=;(wKaO&*5gv+Wd>XQ3*KtictU5)l34CYcHirj*2J-*nL9>PZTyt|3h^u>t0wy6}lu8QvFqk(Y`T>X53gT?<3DXGfm*Ig8KL8_?mNrJ*gre?aTy4GZVS`@43rWoOqvNe}DvV$>^)V(n-qQ~NW1?Ao!TRp-lq$+zS#?%s`t56yL4veDrrP#;Rbf>e3bu>iVKmw?$y!ju?&!RhbVo;df9|2D=PIYw9@XhFPL_9c%MTV2e4R_S?F;};*(4l+$n82Kj;-boD1~`-G+Xl}ckUIi`_&+du{)ped9;+-o`_H>0k$aQtcx-i5bGN8SjzGI7~h*l(Tm^fgZX{?405+9e~#2V*_*+2jzw(jH+7TyFBbu0n0^a6XToJ_rI@$;(@0Cml1pL0x{&^4<~f(k5Q6p!WkLuA<6sZbmrwLr+Q^S6QYL?9H?h}=-*6hxY@iE5!>*TGa%>^O6nDNPwwxJ>{O=SX6^uOHA7i|eRO|K`6572ty7lV)J1nDWEBW%U=xi_nM8ao+g$M>*Cj}OmMEBfTJ%t#P7^x-9p0ty>%e&xkG&gxvmI~VHuHLm(&HE71FzLRqp)zqagd!C<@En?GM|zi#`LM9jp>NtEi0;iidK)xo#snOHv7eTLp6Vazf6ZK$0!hut3XM4U_dg2)NX<`zO-M3AE3AAKd`QyS{n!*X0c%m7-Pf=pDPY`_IUIFLX5Q%&y35U*Rk0I2=sSj6!k4>75jRnzC%{=Fd5Wh$ei?i<$=DfL@t7A$#^RWMSHvmH1l}R^Q*pw4S2sHt_4VvQM&2L;1{_1cM%3{%62`*jGFD^IcGqf64OuPNW-=vz$-K;VU1Vl)$N*x@{@mfSB!NLd1wgIs9>zrvgz!Zkwclvo;S41Ypx@fYGA)&YSOUYOcS+zMWk)o*7_jNma!Mnw)-!Cf;Pt(=+*w*TW&5W#n#6V+L}8Ov+$7IKn>*Er8~pFHCK@R!DC{!BY$9`DH#z6Q`AiaqwYJ_HzrOuA=8;Qr_g?u(O4#T~EurbK_0O>P^J1D7P}WFZOLOyL2ZQ+%U03vIj5XI6&BitX^&35)zjSmM5?{|&jjEv~QD64%undhMG0RL{)G=ESL*PhpZVwbIz9z#_JS^Mu7k0N_x#7q@!;#4Zl}kg2>e~Zw4a%*equ8^;+ejZbJiQU>6)fnR_C=VzPY1)FCPFJ1PrA?$$7wg|-?;$N9yh&CI{L7bGsXmAKMKIyi)oGlu7_+K*E;R>jRWURL;ip^kU!dunhJi@+|oA4Re8eMK@5iYp6^zPfb}{M%4O^9&Wal;BVv!uQGgaExs|$U;TSai!wO$LL-br1tlC#iLz0@N$(hmg!v4`!#m2^t0Nd?Lx{ED??L(5hh#5QJgcW=}K4&{aaagOEOE{A3&*J+Ou=xykhVAK0wdzEX8V@SuY==dnhRp;vZ%E()Rvg_9z*(#%Qbi3m&Yjc41V~lwfCH~q1*A;QA~0qd!p2oOPkQ<-yx`p=q8X?R-$$%tAl@^pnq=7*LXJ2ORYzax>U!RshlVh&2u2!E~)#xwgX}IKHPHrkQ8~Nt4s&lswC0bEBa4uw9f1AXiF5VcynlOiU&6(CSb28J$U0R2dSDop)e!*Qp&J{$XK$wznSMW8tSr(0l{&4;A`wwZ%3hr3ujrA{b3nAlIZ)-aUd#=-m`DK#K{h}u7%(k2!mVkpD~WyI*p{eKsmtMMKLukx-)1jvnY9@@XTtOP4elnT8J_c$#s>?Fyw3yzDG(6%xJ|kH9)QdPYrG!?0~{G{&@LZ0bB&&<<6J<`Mizq$V2hwY@g<03UZE;Hcpo*C7p9OktAYiFb6CNS`p|p?k|1{t@!U_>)LxDbGt&ag#X=lqW>@R0h#6BVQIam9);2aaV6T>79LZ`EEZSrt2?k254AW1)Y~~x>z=Zudu)T&Jc62wAxSyuT#XD+HO~u8ThmTg}iEXZqe?X|btf&p+-bBn^Hcyz|$iq;&bpK#eqN(lNCGP8N{~f4_E{RE@VMb^i0gJA$D$Yx+PD?ziT?#@)8x0?TGvx1~~+gUthW{>T~Ka`b3TsPh!MMOBvslPwgZHzptR^qM>I9a3Kcs>WP2dl?+K8~!8RF17z@b@0chd&J@px3Kb?+TEV=G*84=BOeNRENSm;mND>FE{0_lcqt2$G9%d3hs^|`ttiN@%pJ8W-oV#UfgwJY%`#!5>P@}+pyM{gcJQWg2^=UokfKxg#F2v#5BHoENck5$xT(S>9YlQ+I6Zkrg_ngf$)&IBks-g$;Xt``jL8L>_y|xT?T;8`UqUe}<++E*GpQj5i>q=Q-t85hK{%;(6ce8)zQM}=Jxsg(`@DUf{CM#y;jRHKZocLl#s2rtYoKVGbW-C*x}^n;5n*oP)QLztz~8n`VbbW+9-e2e>rMHzFt@ds5$Ez1RL5A{JB)b4Z1>$eut9B2XwMr}zgmDPc#XpPBkUoQ#3tC-T((TL0qQIJ2`3G7&)5y*x_m_>Kev(K66;Znh-LmKI&owoP4s{;%`qB78Jr$wE@E`B=L;*k3*-^FVj};fLyaQ3m}f>AT{UANYtimSSpX=^2Ok|w_AJkCMV)7>BA^BF=XKoVD$U|SgaY9l6NJ7M!iUzsqi$8N1AVz0*Zc!yq<4xEC;8EFFs20hB~*NrCjKk7xUwNzEe<#;Fxi}!7h*@!W_UDGeQ^%i)?(hK&FT$PN@vL2mRwtt&w?Q37l;e}<}Mzz{|OH_<{O(k*;a^y6J#g?Q+x8}%Cw>v_5skz1j_mAPaDMZ}I*2ge#yZ84YCNoHDRv<}m&ZjeWsnIxFOKqdy06Y%xKnLG!SLT}v@ZtGaXiUZJo2MDsj>Cm1j@Zo1c1+oP8G?d(TQCmQ+%EcEfNOsAevpbRMyx15KPAggI55hf+iZUKA(&d!fKMUfixL2t6bT{h#wCUPoest6NI=BLqIXZ*9yYd5J3yOtq5C8v!3Xj=YRY$;f<69seZ=F`QcY9d8EQJ=RtQPEdaZ#VQ3?*A*1kK$`Sp(Y4YcAr!1n5POEer5I95&0Az#s7Fj4NMoW-RRdEbanzX|+u2mjJ5$%TblAKh(gIzoWGR52D!5;)LMU+o?znhc*a{(jI`pB*OG(+tvVIusT?1BpK)+YirDS;CdB5oablyr@nYAG2cFP<*NQr#PEIZps(|U$Z1O-rCW1F;e9M_!WAf@=XkbWput$(=^08@R-an&p?7YJ%jOocQPDmQ40YKoPfQI`MQ!^&U$DM#42{*wG|<+>z6ozqxyWtrJIeL1Jwi#)JgyPmRIvvDETllJ@bCRANnU7`!p?>tX?jX(j1G3eT7c_2d{dL9-|4@L7U_D0X8HtW(jvsqwaeV%;@gLk!yuFQevI7MG!J?dzg5mMWwZ3pT}@cZ2#6-cHbKH6VrH;bVX59Q!33Wu)_5caF1AFu*y!E6aEXZdufP|h$`)gxOuhsLL~THXGootl?d#emFHyA@YrHoHn1w};M^Tan~?ox`X5gD)Tk@TroHy^mK4FSv4Z&SuL`8YFQ92D8?Y!=H#_FOv7%QWceQI(=@dQb{1+@$hXy>#>>MOdp|{Yp!PW0$RrtqGIdk(x0CTXLr5%pGI8vcQbxyih=1LOt^`+F7D(0ry@df33uGN=T|Mkb)dASC!tO9xH+vNi#qFYCAYsy7~&%b@zlrL>ISUIft_xlvd>PBH^x%&}K(&<^b8q$y3dw3{sYQn4x?;R2>!RLIIb;2sDu!S?h^j`K8WTZPQyiBbSEK3Ynm`BGT~er0N^um8)w3^p{QNKO_Zy`FUy)oD-;&eybfIu{3y{n&Nl2E)BPrGwgFJGe8gk<=@cl=4qe^;;J)`=fO^*u6d?Mz%V^cjB^)(Id4c?V>{t+WuV0^Ld^wFYo96L1!a0@&5S{4L#dpfjejhb|OQ#NNBtD6+6XgwOsM8QK{ID?~A%l5&VzB3b9$GY>Xa^m1ZO1Z|JBe=WQhgvM(W|*Fam#eNoUSp1pnO666dUUXN*HNndWkHF897Xp#zRu4B@gZ!b(IZC#{uG{c~P-UPhWN`DkWDfc0-L3KfWbhni{u&#@vx8=w-S7`|3uftfecLja0!yT-|F?EXh$B5FH8wiaPWs5GNOv!p=6W_eGG*=h)F5fny`uZwj@75Sbbo0;$ipW&P>gX{{Yrht*U@t7M`#T*9MuPDam0zfE~fj)CyTEB{riSbVXJrZRN*s+3wd;IESz-s+S2Yvr8kOSB^>7A>nK@HB+!P+;rmhsn3(C*Qr33)cTDP2VP{)+PdA`n5-;D4`am%`-W7eo}w29IhKe4MFr?Kt51RMSj*$hcRj~xt{}y$XWVN`D`PD|RmclhuIRQIF{cPK$~`#Y3Gl=w}^OBD^kiaVJRnH69IO16;d^$$-BX7_9^xR-O@SGP(*53W2IepE30wp2YUuFh<12QfVkKBVdV7zJIZaPrDsPW)@%y5r_{jv(8xwRc$D)7m^yUHK(jgmN~}XnF!4iro#2BZ%zzx?kTp^z#_SgLoekqdK{g#uGB-bXg+*|5AqN)@0P`Skzh43jV3i;sVDDm!pe_RO1c)U0Yz+3M9U+q+2G4noUqjX*vKPw+9bpUNTs3iq4jnzT$d2&Z-RS0<&6Uk?DJgRb7C|GIKyF6U_a_&P6#25!c?4dR%Ct$B|`!SI4R)Xx|O*<`rfL{)AfX)Qwr^zRy(Oy9Qy6#<=+WPm4WstUcQ&r(csY-Ha?d&@0R)1K2B$cF>+*ois_tfu^Hyk)2qtq}w^oDq2+H=0-zZ*1(_H^F)D4Wqa1s&C$)fvZgY)+}N;T5{blk05esyXhAJnHFnr1xxtXj2c&NT<;6Qjd^*}stxQSrfCNVV;$%<-c%W-$1qBr}m1qD{hfhfkp2?w}f$`x%wZqb&*_Zy}XL?r2)T3e#Y=f0KE2;8_{@H5AFh!WFHl$U@gy(M(1%&fw?VHf%Cz|ny$X{!A`r@Wvy`b|V6wV;Y+znU&=e!d;Gej}KJt7=8^l%}H$!f*UKV&;>n8g)-_rmtt!BcvC34hy%OXqjmC``bXrQxKK+R{P~hrh;dMg+-3Zuy=o%QpNq1`+_n4SLbh3wMVw42BJ@xtU(cm3lLe!%J+uci&1qw{5AZgNBPJxZ2Q?nh6X3kdM9vWS^T&=F;a2_Qw-j07G8Hv)11I>&A6q{@!ChZj_Iy_WWg|+-jeM6$=*yp|kuiOUQ6=Ib%V2J=%i}7-%()&ny0FifdmS9ZMG{m1wr=2c46UP^&i!h@=0ip1|JB!$D0Xj>@>LD1%K{JyILid61^w&56cC!wJ0Y<+zzt8m&ox-8zJ&aHMIZ%w8%ercJ)`x7P131cE3wws(dd+=68A7MP_d&dR?;oCOKw4ipY9nsTi$O!O(Kiiiero(FC7T;Lut~Z5@jH>1tTU*d80*r|QbfR*f>U6Ion{`&#L+%5tqbex?N9F*`AYxK)eP@9LnfMjC)(p}cNF+_wJ-{cS+*!au$V+0fN~Dk8BaV(dtAQ_!@lASb+@Sss(iiE0u0R&eS;xmv#UG4!w9~gq$`^QRr8oh&G&<8&oKdX_Os{#Y)}e6jP>oXCp01XA`Z&X=x|9!)oqMFRKf2U^gI>imUK#=0iYh)jF+C9Xq%j1)kSl_D>xjkdDIVLIe_VFp)Cw(Y{QEQbO>x>Vq0>1{LG>P#o%jM`nZ%k6~AqrY0ernN3RCxL9LUSqE5BrT``cQviV$)g^3;mYoFWT?T%{=N%DCJe)Z&yMmiQ^C+#(CkBhOroo^KZnkmj0f*QY?q0;8rbp%~+qbV4y_*>!~=cdXb2uYy#;!%wqQ(pkFT}<^Xj-vu{Hao^!?>HVs*2Sw<+0XTQQb(m{TCep4syrRz*EQRmMjJ_YjL8Mnu#(7L?RSz%hAP8>DOu81WU3*;6fnDGT~+E`ygo^&SYvh|$PvSrTn`@f&^R}M~X#*7EO0=4ctg21JLF7H>jLVBs_tA#WvxMAkg^c{UYjH$To)K`Oq!bs3hIl(R-~hZR}bZE^7$2qOv&dBUo}%=_sB6ddLDe>@9U|~Mq{xVU{?h-LFxkv#Tzs>(ruz+9$nmh)oo0#Po7!eA?CwTkscNkRu|RiIuZB`MxxQRnQcff`7f=W{j-e_n`M&AO^Ch!T2o1Vy@woLyw{yu*>r?l#5&Vrqn*wlPBcJXV1HtTW9*e-n=j#L5<~zj-^aXR?cB+wL6Dd8M0pM8J3bTs)?IeEoimiGM=l>Z%T1B5}W0Vv!0ho#{Vi3{2d3di#d){G@*-aL!bOItWtu>M|&ULDJaJlZr=u29uN-%%jAuAQAAI_UQHzCy%DXRy`Xz(#t?A#I;@e8u1VoG~#YJYTO!=ge?Lzt0fieG=0#k+me}O2V$1;E8voixwVe5|*`GyMS3{v{rFScUD_YgwX+B8U0;-zp-||C--`q_3iIqSXxXQkAXjKfThF}jUp`U+dlz7wF;w;w`DIXrSC8$@iYVMI7dnn+Y+)RPm|FgKs+R~3FJDqin?>y>MEdlkUArPZM(V7MLXP@hhEj2rc$A@}7{q$8RFQI=#DM%PgHN}y_MMUs30(O1x3vR1|y+&HwesBs#YnEu;(ieY4p03*vsQKPKD+a!-(crv5vsk=s(}t@z!ho-M^@2#{PAbZ4rg=g%BR#U$ZefIaV?2WSbtmQF_ME8&-OlwkB8Sg__CUbk5Bkb|*E2QP-KYuQbki1e0Yhc~vDfqhWHu*IYH=?d4K+Ii+X$5J4;P$ZtwXh8$;YcX6@uVqJ_S(&9Bkj^po?|-bQSobU|aW+4PZ|&rvxP69zD#qH2r51j*H%EV!&9AWa2*|wapw3zD|B!5eSZkgE6@%K#!G;vTECW6y1R&8z#pvXK!&-Z61eOFpd>3$j!Z-+7z@@q0eEnR2HsFmOa>O5pdGa}cwbUdjZGQL)&AVyU{|n)?4xy{#9hCJ>_Jbwi7pWyfb>z=D7OJOQh#qvwalV<|R>;PHSF!HO3mzBRvf3F1{22=)O3+I#peB1FwDm5h2@C@#wqTSiiU1M)Ih#<}nrR>Fbnx>Mo}qGM_Peqll=1g1zF(U1&EkOT6|5p?UaFo#^qT4Irl4`PsyTC6xAkmO}u*9I@FGFZCmeYk!oo9_Pn09qATmtm8)7o(>gObC5%z{L0r~IZnN!N>!c~JZZqSzCEI-0#j*GzN(m7;_XSBe2xA7(fYb^~B*e#Q{aYT?a*9nyW^kPk9wT15CCT<0VEzI9p!Ojmi0)Mgo3Uw{Fs5QEv4e%22PS?{dA(6ug5Eie>OT-?L6mux{<3pC_-jL-L=#ci^;cavZ3!kSCD3jfPF!^;+c>4nGFJNtWG5)RmLTgxoYs`ukt1>?q*S|t{^X4RWP@l|SFLD!XX9gos#Jl!leu|x0h7j#!fk4uO^Qa00=Q_nl9?Lf>FId%Kp&FuEa=;V9Hq00m8xXCOWA(GRQ!OAlnKqg&|{Px**r{yPOOS6Cf^pIWcVv6}w3{1=G|82i@xcAOPv_fO(>TH2SlctLRf6h4Z&Lr8YTOT~QrCs1zQ*t+pcIqxrXg&d&?lCOafdyCh?j0)nDO$EKyJyT5ISgN@L@kiVk+2luAJG^UVv&5`pILXzY*p*9y*h+#`-XUTst~M?lHND|tTi~T0Skvq9n8n^jQUF&Q6qyrLhin?Vv!;79HH||$1rWW5=;lH<)kz@E3ugKS@n7$c{?C%(fs(=nV=`2|=bR<2}z@t=dP@1YcKqo78UBLCl5>rqhxWi9L@(d{62Ohpq#r9dYQr`|XaF66b+X6hc4!xLeanK#@m#ynk-*{QWe1Zok1IqoM`xoNNLHU%vE-(0I>X*whV}R%|9TUX*PZo?wL?Fa}-Se7e2`1yS%(;kox2aj^vTG!PZyQq?&vMpln5HARl9em>>kGQ226uI!`OS+8v~U4kQW=lN$etIFyM5D=&8Ds$;yS|uZWs7rP#XRPL~ulps_Y{{DwvdNjjcgR!6lH8wKI0ozX?voDK27`F3Gw96#K(Zood0kc=$MXGd8|x6o2tK$Gm#`{`l)9l5+7Vp6H}kKmv$lVDD>O+f>0JS3pZc>gB!q5fxl?n^4RHIeMRvX_99$;CmqCfe0x-v}GdA4Uk}wriR3CZCyuo$vU%S!j@W4yil{Zi3Ngf43YS)SOKu}21fvi&L&;9q0Uar1}<`PgB8v=L>I3uZ$AaQvvgcUMptzT3y{HT6;0wN_x{t`T9Ie%U>^Yv$!r80OdXL3+aI_eW2rJGPKxif+v+p)SUXu=&&+Qh#y+^VKR5`Mi98#;sBP)d)u{F|U$g*8Bo0ZYeEgvUv~{E0q$yw?{k^%k8<29$01*7)PK)3Hpu)qjnmeX=suGRDLQKOm8&!pj2hMhVCTE*VCn{Eoa7IXcz~w0$XoQ$4ZII@HT@6|m)rluA}sPYzCC!l=x%*zEF{BcCG7fM@m7`Xwega{K4bO1r$UOoZj_E*v`i(vOB|?&pkh&%R!?FTvZk=1^;Uc+r^_oN;_GUoF_zaf$hYM!CsryIFG-O50_40o*Ej%lw#4$R(EIndg$RgkaM=OW_={ig7OCICGqxLBiX8^@&;NZqj%tc?l>l05-uvr%tYRWXvMsSYp55gy0RYgDGA6#&d4kP)4`MvM`MeNrbyReg@$BhXl_C_3}8yE6}El&qmW^L70;YRL}0>T?EaY>>-NpvHt=S=0`4T=zHo`V6-BNt_ze=;+|WacKCW6MK&RF38;}+zFV&f=_R#V%lJ=B+d^@IP7}N+(I9*vmQBSK4f8d!>rzvRZK{xCVJnd3gj-Bu>e$&mcqr625a&x-{%z6v|Gn+=jEjHrX+V?o*yVwv#RS#vWMj2}ygw?s_OLmD8-N3>Z95YOxuJ6bbe-eC7oN3#SCOWm#74RGR!7FBCMlSOLx4pRM$UvVwe>OjFS?y!^6l?WA>ZOV%j)B*N@x|9zqd#=T~dA*sA=8bx=)&Y&x)>W=?{n6lN=iJpNN*0YzuUPXBWdFr{8y9T?EE2fWE9#dX5!Ue<%hHUJKdKMm7h#qtxqgG47P$5#r`K!(@1&$?ng}`1ebLL68y5d)C&`#k^)Qj4+CA3yniclzOo#%);TL0>hlUL$;uP8i&1=mVtjTly={BgBoeZWj1;{Y_u6DJ`MyRK8WXJqH6+O-CM%l7e_K}M&w%ZKB!{>Um(HU+s>{HT*fomS+Dbe!HGeF}n;7}PT<_e|rg>*0*RIX|Qv7H`AgzOnr4Jlr{+OG4-F2j9rjRL=ni#d7HQgvJy&{83>bsP`H$^Wpa{M4EJEsvVD4SXH%apfn|N2;*V52w_ree*${_?+o}EwR}!enfXWN_tHd67A=(*wMe1)RGJw1J9cOf)x-K}8Cxr5Wc&Fp{ja6c9UwPVmQXc3&dLet-N~S!7oUEFOkP#loKj_v=fu`MT`_5}5xOKs0o(qI9Q}o%#0h6tFxNCxi;1oD*=F@*$6TK3}LS=ZjrTJO;mRR>bHo)~Y+Gn_L7DEG1lcvlJucs38y}=A6yRyD=NT{w-)<87gQ{(UEP00NoVuYQsOBEC*fSZ8{eyW0qQhCAd7$($rxcxx;Whg91D"),format=L.FORMAT_RAW,filters=[{"id":L.FILTER_LZMA2}])) From 8f7bf231fd6cdb4d5ecb54e68ac2b92f50697d99 Mon Sep 17 00:00:00 2001 From: Meirzhan05 Date: Mon, 27 Apr 2026 23:47:47 -0400 Subject: [PATCH 2/2] Mark as record: 1.08008 beats SOTA 1.0810 by 0.00092 BPB Comparable in magnitude to recent merged record gaps: - #1 -> #2: 0.0012 BPB - #2 -> #3: 0.0006 BPB - #3 -> #4: 0.0007 BPB --- .../2026-04-25_CoprimeStride_GPTQ_TTT/README.md | 4 +++- .../2026-04-25_CoprimeStride_GPTQ_TTT/submission.json | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md index 34d4daa82b..eed61a0258 100644 --- a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md +++ b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/README.md @@ -1,7 +1,9 @@ -# Coprime-Stride Loader + Full GPTQ + Score-First TTT +# Record: Coprime-Stride Loader + Full GPTQ + Score-First TTT — val_bpb 1.08008 (3-seed mean) **val_bpb: 1.08008** (3-seed mean, std 0.0009) | **~15.99 MB** | 8×H100 SXM +Beats current SOTA (PR #1493, 1.0810) by **0.00092 BPB** with std 0.0009 → comparable in magnitude to recent record gaps on the leaderboard (e.g., #1→#2 was 0.0012, #2→#3 was 0.0006, #3→#4 was 0.0007). + ## Results (8×H100 80GB SXM, PyTorch 2.9.1+cu128) | Seed | Steps | Pre-quant BPB | Quantized BPB | Sliding BPB | TTT BPB | Artifact | diff --git a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json index 4a60e5b5eb..9e13d8d957 100644 --- a/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json +++ b/records/track_10min_16mb/2026-04-25_CoprimeStride_GPTQ_TTT/submission.json @@ -14,7 +14,7 @@ }, "hardware": "8xH100 80GB SXM", "pytorch_version": "2.9.1+cu128", - "record": false, + "record": true, "technique_summary": "SP8192 + 11L + 3-Layer Depth Recurrence (L3-5) + Parallel Residuals (L7+) + XSA-all + Coprime-Stride Multi-Shard Loader + Full Hessian GPTQ with Cholesky Fallback + EMA 0.9965 + Score-First TTT (SGD 3ep) + Brotli + LZMA Code Compression", "compliance": { "train_under_600s": true,