From c73b78ac4a2c5b4307589e5b3b4979b42cc50134 Mon Sep 17 00:00:00 2001 From: kyleclo Date: Wed, 16 Jul 2025 15:47:55 -0700 Subject: [PATCH 1/6] baseline olmo 3 --- ...lmo3_7b-step289000-anneal-100B-dolma2.yaml | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml new file mode 100644 index 00000000..64118925 --- /dev/null +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml @@ -0,0 +1,34 @@ +name: "baseline-olmo3_7b-step289000-anneal-100B-dolma2" +description: "Baseline: OLMo3 7B step 289000 (~4.9T tokens) anneal to 100B Tokens with dolma2 mix" +budget: "ai2/oe-base" +workspace: "ai2/olmo-3-microanneals" +nodes: 16 +gpus: 8 +preemptible: true +max_tokens: 100_000_000_000 +global_batch_size: 2097152 +sequence_length: 8192 +seed: 1337 +model: "olmo2_7B_swafix" +tokenizer: "dolma2" +priority: urgent +cluster: ai2/augusta-google-1 +rank_microbatch_size: 16384 +scheduler_type: linear +warmup_steps: 0 +activation_checkpointing: true +annealing: + enabled: true +load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000 +load_state: false +dataset: + sources: + - name: dolma2-0625-v0.1 + target_ratio: 1.0 + paths: + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/all-dressed-snazzy2/*/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/arxiv/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/finemath-3plus/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/s2pdf/*/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/stack-edu/*/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/wikipedia/*.npy \ No newline at end of file From 7e5db90bb35b1343129af0e1da4f90f7f3239da3 Mon Sep 17 00:00:00 2001 From: kyleclo Date: Wed, 16 Jul 2025 18:01:06 -0700 Subject: [PATCH 2/6] oops; --- .../baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml index 64118925..df60821f 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml @@ -1,5 +1,5 @@ name: "baseline-olmo3_7b-step289000-anneal-100B-dolma2" -description: "Baseline: OLMo3 7B step 289000 (~4.9T tokens) anneal to 100B Tokens with dolma2 mix" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 100B Tokens with dolma2 mix" budget: "ai2/oe-base" workspace: "ai2/olmo-3-microanneals" nodes: 16 From de66cb6b88bb8b5cff1283879fecdc6ac210324a Mon Sep 17 00:00:00 2001 From: kyleclo Date: Thu, 17 Jul 2025 00:05:09 -0700 Subject: [PATCH 3/6] more baselines --- ...b-step289000-anneal-10B-dolma2-round1.yaml | 67 +++++++++++++++++++ ...lmo3_7b-step289000-anneal-10B-dolma2.yaml} | 10 +-- ...b-step527000-anneal-10B-dolma2-round1.yaml | 67 +++++++++++++++++++ ...olmo3_7b-step527000-anneal-10B-dolma2.yaml | 34 ++++++++++ 4 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml rename src/cookbook/recipes/olmo3-midtraining/{baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml => baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml} (86%) create mode 100644 src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml create mode 100644 src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml new file mode 100644 index 00000000..252ef66c --- /dev/null +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml @@ -0,0 +1,67 @@ +name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 mix" +budget: "ai2/oe-base" +workspace: "ai2/olmo-3-microanneals" +nodes: 4 +gpus: 8 +preemptible: true +max_tokens: 10_000_000_000 +global_batch_size: 2097152 +sequence_length: 8192 +seed: 1337 +model: "olmo2_7B_swafix" +tokenizer: "dolma2" +priority: high +cluster: ai2/augusta-google-1 +rank_microbatch_size: 16384 +scheduler_type: linear +warmup_steps: 0 +activation_checkpointing: true +annealing: + enabled: true +load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000 +load_state: false +dataset: + sources: + - name: hqweb + target_ratio: 0.45 + paths: + - s3://ai2-llm/preprocessed/cc_all_dressed/all_dressed_v3_subsamples/midtrain_pools/50B/allenai/dolma2-tokenizer/*.npy + - name: code + target_ratio: 0.2 + paths: + - s3://ai2-llm/preprocessed/stackedu-fim-20pct-natural/allenai/dolma2-tokenizer/*.npy + - name: finemath + # 10% less the .0195 over 10% for dolminos2math + target_ratio: 0.0806 + paths: + - gs://ai2-llm/preprocessed/finemath/finemath-3plus/allenai/dolma2-tokenizer/*.npy + - name: dolminos2math + target_ratio: 0.1194 + paths: + # 10.7B + - s3://ai2-llm/preprocessed/dolmino-math-1124-retok/dolma2-tokenizer/*.npy + # 1.25B + - s3://ai2-llm/preprocessed/midtraining-reasoning/mj_intermediate_math/allenai/dolma2-tokenizer/*.npy + - s3://ai2-llm/preprocessed/midtraining-reasoning/tinyMATH/MIND/allenai/dolma2-tokenizer/*.npy + - s3://ai2-llm/preprocessed/midtraining-reasoning/tinyMATH/PoT_tokens/allenai/dolma2-tokenizer/*.npy + - name: reddit + target_ratio: 0.089 + paths: + - gs://ai2-llm/pretraining-data/sources/reddit/dolma_raw/format_rewriting/densesub_highthresh_microanneal_4omini_rewrite_tokenized/*.npy + - name: instruction + target_ratio: 0.011 + paths: + - s3://ai2-llm/preprocessed/tulu-3-sft-for-olmo-3-midtraining/dolma2-tokenizer/tulu-3-midtrain-v0-data-simple-concat-with-new-line-with-generation-prompt/*.npy + - name: r1_reasoning + target_ratio: 0.02375 + paths: + - s3://ai2-llm/preprocessed/thinking-data/big-reasoning-traces/allenai/dolma2-tokenizer/*.npy + - name: qwq_reasoning + target_ratio: 0.02375 + paths: + - s3://ai2-llm/preprocessed/thinking-data/qwq-traces/dolma2-tokenizer/*.npy + - name: gemini_reasoning + target_ratio: 0.0025 + paths: + - s3://ai2-llm/preprocessed/thinking-data/s1k-gemini-traces/dolma2-tokenizer/*.npy diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml similarity index 86% rename from src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml rename to src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml index df60821f..f7cfeee9 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-100B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml @@ -1,17 +1,17 @@ -name: "baseline-olmo3_7b-step289000-anneal-100B-dolma2" -description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 100B Tokens with dolma2 mix" +name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" budget: "ai2/oe-base" workspace: "ai2/olmo-3-microanneals" -nodes: 16 +nodes: 4 gpus: 8 preemptible: true -max_tokens: 100_000_000_000 +max_tokens: 10_000_000_000 global_batch_size: 2097152 sequence_length: 8192 seed: 1337 model: "olmo2_7B_swafix" tokenizer: "dolma2" -priority: urgent +priority: high cluster: ai2/augusta-google-1 rank_microbatch_size: 16384 scheduler_type: linear diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml new file mode 100644 index 00000000..3569246e --- /dev/null +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml @@ -0,0 +1,67 @@ +name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" +budget: "ai2/oe-base" +workspace: "ai2/olmo-3-microanneals" +nodes: 4 +gpus: 8 +preemptible: true +max_tokens: 10_000_000_000 +global_batch_size: 2097152 +sequence_length: 8192 +seed: 1337 +model: "olmo2_7B_swafix" +tokenizer: "dolma2" +priority: high +cluster: ai2/augusta-google-1 +rank_microbatch_size: 16384 +scheduler_type: linear +warmup_steps: 0 +activation_checkpointing: true +annealing: + enabled: true +load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000 +load_state: false +dataset: + sources: + - name: hqweb + target_ratio: 0.45 + paths: + - s3://ai2-llm/preprocessed/cc_all_dressed/all_dressed_v3_subsamples/midtrain_pools/50B/allenai/dolma2-tokenizer/*.npy + - name: code + target_ratio: 0.2 + paths: + - s3://ai2-llm/preprocessed/stackedu-fim-20pct-natural/allenai/dolma2-tokenizer/*.npy + - name: finemath + # 10% less the .0195 over 10% for dolminos2math + target_ratio: 0.0806 + paths: + - gs://ai2-llm/preprocessed/finemath/finemath-3plus/allenai/dolma2-tokenizer/*.npy + - name: dolminos2math + target_ratio: 0.1194 + paths: + # 10.7B + - s3://ai2-llm/preprocessed/dolmino-math-1124-retok/dolma2-tokenizer/*.npy + # 1.25B + - s3://ai2-llm/preprocessed/midtraining-reasoning/mj_intermediate_math/allenai/dolma2-tokenizer/*.npy + - s3://ai2-llm/preprocessed/midtraining-reasoning/tinyMATH/MIND/allenai/dolma2-tokenizer/*.npy + - s3://ai2-llm/preprocessed/midtraining-reasoning/tinyMATH/PoT_tokens/allenai/dolma2-tokenizer/*.npy + - name: reddit + target_ratio: 0.089 + paths: + - gs://ai2-llm/pretraining-data/sources/reddit/dolma_raw/format_rewriting/densesub_highthresh_microanneal_4omini_rewrite_tokenized/*.npy + - name: instruction + target_ratio: 0.011 + paths: + - s3://ai2-llm/preprocessed/tulu-3-sft-for-olmo-3-midtraining/dolma2-tokenizer/tulu-3-midtrain-v0-data-simple-concat-with-new-line-with-generation-prompt/*.npy + - name: r1_reasoning + target_ratio: 0.02375 + paths: + - s3://ai2-llm/preprocessed/thinking-data/big-reasoning-traces/allenai/dolma2-tokenizer/*.npy + - name: qwq_reasoning + target_ratio: 0.02375 + paths: + - s3://ai2-llm/preprocessed/thinking-data/qwq-traces/dolma2-tokenizer/*.npy + - name: gemini_reasoning + target_ratio: 0.0025 + paths: + - s3://ai2-llm/preprocessed/thinking-data/s1k-gemini-traces/dolma2-tokenizer/*.npy diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml new file mode 100644 index 00000000..f7cfeee9 --- /dev/null +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml @@ -0,0 +1,34 @@ +name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" +budget: "ai2/oe-base" +workspace: "ai2/olmo-3-microanneals" +nodes: 4 +gpus: 8 +preemptible: true +max_tokens: 10_000_000_000 +global_batch_size: 2097152 +sequence_length: 8192 +seed: 1337 +model: "olmo2_7B_swafix" +tokenizer: "dolma2" +priority: high +cluster: ai2/augusta-google-1 +rank_microbatch_size: 16384 +scheduler_type: linear +warmup_steps: 0 +activation_checkpointing: true +annealing: + enabled: true +load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000 +load_state: false +dataset: + sources: + - name: dolma2-0625-v0.1 + target_ratio: 1.0 + paths: + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/all-dressed-snazzy2/*/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/arxiv/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/finemath-3plus/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/s2pdf/*/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/stack-edu/*/*.npy + - gs://ai2-llm/preprocessed/dolma2-0625/v0.1/allenai/dolma2-tokenizer/wikipedia/*.npy \ No newline at end of file From 95f1e79f7f5ef25740ed17d5f1ebcba5fe6c04ba Mon Sep 17 00:00:00 2001 From: kyleclo Date: Thu, 17 Jul 2025 00:07:55 -0700 Subject: [PATCH 4/6] oops ;rename --- ...baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml | 4 ++-- .../baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml | 4 ++-- ...baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml | 4 ++-- .../baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml index 252ef66c..3569246e 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml @@ -1,5 +1,5 @@ -name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2" -description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 mix" +name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" budget: "ai2/oe-base" workspace: "ai2/olmo-3-microanneals" nodes: 4 diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml index f7cfeee9..f33666ae 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml @@ -1,5 +1,5 @@ -name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" -description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" +name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2" +description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 mix" budget: "ai2/oe-base" workspace: "ai2/olmo-3-microanneals" nodes: 4 diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml index 3569246e..dd3d1dcb 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml @@ -1,5 +1,5 @@ -name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" -description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" +name: "baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1" +description: "Baseline: OLMo3 7B step 527000 (~8T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" budget: "ai2/oe-base" workspace: "ai2/olmo-3-microanneals" nodes: 4 diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml index f7cfeee9..0b109a07 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml @@ -1,5 +1,5 @@ -name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" -description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" +name: "baseline-olmo3_7b-step527000-anneal-10B-dolma2" +description: "Baseline: OLMo3 7B step 527000 (~8T tokens) anneal to 10B Tokens with dolma2 mix" budget: "ai2/oe-base" workspace: "ai2/olmo-3-microanneals" nodes: 4 From d355d7aacf399a0ed4a7011e2a24b5e3d9a5f74f Mon Sep 17 00:00:00 2001 From: kyleclo Date: Thu, 17 Jul 2025 00:15:45 -0700 Subject: [PATCH 5/6] changeworkspace --- .../baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml | 2 +- .../baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml | 2 +- .../baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml | 2 +- .../baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml index 3569246e..4b7640df 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1.yaml @@ -1,7 +1,7 @@ name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2-round1" description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" budget: "ai2/oe-base" -workspace: "ai2/olmo-3-microanneals" +workspace: "ai2/oe-data" nodes: 4 gpus: 8 preemptible: true diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml index f33666ae..c563dcd4 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step289000-anneal-10B-dolma2.yaml @@ -1,7 +1,7 @@ name: "baseline-olmo3_7b-step289000-anneal-10B-dolma2" description: "Baseline: OLMo3 7B step 289000 (~4.03T tokens) anneal to 10B Tokens with dolma2 mix" budget: "ai2/oe-base" -workspace: "ai2/olmo-3-microanneals" +workspace: "ai2/oe-data" nodes: 4 gpus: 8 preemptible: true diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml index dd3d1dcb..b6a4b09f 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml @@ -1,7 +1,7 @@ name: "baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1" description: "Baseline: OLMo3 7B step 527000 (~8T tokens) anneal to 10B Tokens with dolma2 midtraining mix (round 1)" budget: "ai2/oe-base" -workspace: "ai2/olmo-3-microanneals" +workspace: "ai2/oe-data" nodes: 4 gpus: 8 preemptible: true diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml index 0b109a07..3b0269f9 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml @@ -1,7 +1,7 @@ name: "baseline-olmo3_7b-step527000-anneal-10B-dolma2" description: "Baseline: OLMo3 7B step 527000 (~8T tokens) anneal to 10B Tokens with dolma2 mix" budget: "ai2/oe-base" -workspace: "ai2/olmo-3-microanneals" +workspace: "ai2/oe-data" nodes: 4 gpus: 8 preemptible: true From e25513a1370e74e1513b6bb248913363e65189a2 Mon Sep 17 00:00:00 2001 From: kyleclo Date: Thu, 17 Jul 2025 00:23:47 -0700 Subject: [PATCH 6/6] wrong ckpt --- .../baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml | 2 +- .../baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml index b6a4b09f..d189b049 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2-round1.yaml @@ -19,7 +19,7 @@ warmup_steps: 0 activation_checkpointing: true annealing: enabled: true -load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000 +load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step527000 load_state: false dataset: sources: diff --git a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml index 3b0269f9..8393b08e 100644 --- a/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml +++ b/src/cookbook/recipes/olmo3-midtraining/baseline-olmo3_7b-step527000-anneal-10B-dolma2.yaml @@ -19,7 +19,7 @@ warmup_steps: 0 activation_checkpointing: true annealing: enabled: true -load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step289000 +load_path: gs://ai2-llm/checkpoints/OLMo3-7B-swafix/step527000 load_state: false dataset: sources: