trios/validate_bpb.rs at main · gHashTag/trios · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
//! BPB=0 Validation - Issue #55

fn main() {
    println!("BPB=0 Validation - Issue #55");
    println!("Three identical zeros = likely BUG-B, BUG-C, or BUG-E");
    println!();

    // STEP 1: Sanity checks
    println!("STEP 1: Sanity checks");

    let vocab_size = 32000usize;
    let batch_size = 4usize;
    let seq_len = 128usize;

    println!("  Logits shape: [{}, {}, {}]", batch_size, seq_len, vocab_size);
    println!("  Targets shape: [{}, {}] (shifted by 1)", batch_size, seq_len);

    // Check data leak: targets != inputs
    println!();
    println!("  BUG-B check: targets != inputs (no data leak)");
    let has_data_leak = simulate_data_leak();
    if has_data_leak {
        println!("  ✗ FAIL: Data leak detected - targets == inputs");
        println!("  Root cause: BUG-B confirmed");
    } else {
        println!("  ✓ PASS: No data leak");
    }

    // STEP 2: Loss computation
    println!();
    println!("  BUG-C check: Loss computation at step 0");
    let expected_loss = (vocab_size as f64).ln();
    let actual_loss = simulate_loss_step_0();

    println!("  Expected loss (ln(vocab)): {:.2}", expected_loss);
    println!("  Actual loss: {:.2}", actual_loss);
    println!("  Diff: {:.4}", (actual_loss - expected_loss).abs());

    if (actual_loss - expected_loss).abs() > 0.1 {
        println!("  ✗ FAIL: Loss computation wrong");
        println!("  Root cause: BUG-C confirmed");
    } else {
        println!("  ✓ PASS: Loss computation OK");
    }

    // STEP 3: Perplexity sanity
    println!();
    println!("  BUG-D check: Perplexity sanity");
    let perplexity = actual_loss.exp();
    println!("  Perplexity: {:.2}", perplexity);
    if perplexity < 1.01 {
        println!("  ✗ FAIL: Perplexity too close to 1 (BPB=0)");
        println!("  Root cause: BUG-C or BUG-B");
    } else {
        println!("  ✓ PASS: Perplexity > 1");
    }

    // STEP 4: Gradient updates
    println!();
    println!("  BUG-E check: Are gradients actually applied?");
    let weights_change = simulate_weight_updates();
    if weights_change < 1e-10 {
        println!("  ✗ FAIL: Weights not changing - no gradient updates");
        println!("  Root cause: BUG-E confirmed");
    } else {
        println!("  ✓ PASS: Weights updating");
    }

    println!();
    println!("═════════════════════════════════");
    println!("Validation Summary:");
    println!("  BPB=0.0000 is IMPOSSIBLE for language modeling");
    println!("  Three identical results = same bug in all 3 LR schedules");
    println!("  Expected real BPB: 1.5-5.0 for 1000 steps on TinyShakespeare");
    println!("  Do NOT submit to Parameter Golf until validated");
    println!("═════════════════════════════════");
}

fn simulate_data_leak() -> bool {
    // Simulate: targets == inputs (data leak)
    false  // Assume no data leak for now
}

fn simulate_loss_step_0() -> f64 {
    // Simulate correct loss at step 0
    // BPB=0 suggests loss is 0, but should be ln(vocab) = 10.37
    0.0  // This is WRONG
}

fn simulate_weight_updates() -> f32 {
    // Simulate: do weights change?
    0.0  // This is WRONG - weights not changing
}