openai · sahiee-dev · Apr 30, 2026
diff --git a/...ds/track_10min_16mb/2026-04-30_SP8192_PolarExpressNS_MinLR_LQERAsymR4/README.md b/...ds/track_10min_16mb/2026-04-30_SP8192_PolarExpressNS_MinLR_LQERAsymR4/README.md
@@ -0,0 +1,28 @@
+# SP8192 PolarExpressNS MinLR LQERAsymR4 Baseline
+
+Baseline run establishing the current score for `SP8192` with `PolarExpressNS`, `MIN_LR`, and `LQERAsymR4`. 
+
+This baseline implements a 3-layer depth recurrence mechanism, symmetric row/col normalization in the Muon optimizer, a causal token-only n-gram tilt, parallel residuals in layers 7-10, and split learning rates for early/late parameter layers, alongside targeted fixes for proper MLP normalization independence and PyTorch `no_grad()` TTT evaluation. Soft-Round QAT was stripped to provide a clean baseline for these architectural techniques.
+
+## Validation Performance
+The submission yields a 3-seed mean `val_bpb` of **1.07302**.
+
+| Seed | `val_bpb` | Artifact Size (bytes) |
+|------|-----------|-----------------------|
+| 1337 | 1.07294458| 15,955,411            |
+| 42   | 1.07270429| 15,953,179            |
+| 2025 | 1.07342419| 15,951,874            |
+| **Mean** | **1.07302** | **15,953,488** |
+
+## Compliance & Evaluation
+- `train_under_600s`: True
+- `artifact_under_16mb`: True
+- `eval_under_600s`: True
+- `no_pre_quant_ttt_on_val`: True
+- `score_first_ttt`: True
+- `three_seeds`: True
+
+## Contents
+- `train_gpt.py`: Main training script with all modifications.
+- `submission.json`: Official track evaluation parameters and scores.
+- `train_seed*.log`: Detailed logs for all three seed evaluations.
diff --git a/records/track_10min_16mb/2026-04-30_SP8192_PolarExpressNS_MinLR_LQERAsymR4/submission.json b/records/track_10min_16mb/2026-04-30_SP8192_PolarExpressNS_MinLR_LQERAsymR4/submission.json
@@ -0,0 +1,41 @@
+{
+  "author": "sahiee-dev",
+  "github_id": "sahiee-dev",
+  "name": "SP8192 PolarExpressNS MinLR LQERAsymR4 Baseline",
+  "date": "2026-04-30",
+  "track": "10min_16mb",
+  "val_bpb": 1.07302,
+  "val_bpb_std": 0.00030,
+  "seeds": [1337, 42, 2025],
+  "seed_results": {
+    "1337": {"val_bpb": 1.07294458, "artifact_bytes": 15955411},
+    "42":   {"val_bpb": 1.07270429, "artifact_bytes": 15953179},
+    "2025": {"val_bpb": 1.07342419, "artifact_bytes": 15951874}
+  },
+  "hardware": "8xH100 80GB SXM",
+  "pytorch_version": "2.11.0+cu128",
+  "technique_summary": "PR #1874 (AjAnubolu) base: SP8192 + SmearGate + AttnOutGate w24 + LoRA-TTT + Phased TTT + Polar Express NS + MIN_LR=0.10 + LQER Asymmetric Rank-4",
+  "compliance": {
+    "train_under_600s": true,
+    "artifact_under_16mb": true,
+    "eval_under_600s": true,
+    "no_slot": true,
+    "no_pre_quant_ttt_on_val": true,
+    "no_etlb": true,
+    "no_ngram_cache": true,
+    "score_first_ttt": true,
+    "three_seeds": true,
+    "no_text_normalization": true,
+    "bpb_on_original_bytes": true
+  },
+  "attribution": {
+    "pr1874_base": "@AjAnubolu (PR #1874)",
+    "sp8192_base": "@bigbag (PR #1493)",
+    "smeargate_attn_out_gate": "@MarioPaerle (PR #1667)",
+    "lora_ttt_improvements": "@renqianluo (PR #1767)",
+    "phased_ttt": "@jorge-asenjo (PR #1700)",
+    "polar_express_ns": "PR #1344",
+    "min_lr_warmdown_floor": "PR #1787",
+    "lqer_asymmetric_rank4": "PR #1797"
+  }
+}