Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0b401af
testing updated docker
hamishivi Jul 8, 2025
e935690
testing updated docker
hamishivi Jul 8, 2025
175e57e
testing updated docker
hamishivi Jul 8, 2025
43a5a26
testing updated docker
hamishivi Jul 8, 2025
a3b18ae
testing updated docker
hamishivi Jul 8, 2025
82ec4ab
testing updated docker
hamishivi Jul 8, 2025
621fad1
try using uv
hamishivi Jul 8, 2025
352b694
some changes
Jul 8, 2025
67b24b9
mason new default env var
hamishivi Jul 8, 2025
3fa4b5b
trying workflows
hamishivi Jul 8, 2025
3396b1c
update toolvllm
hamishivi Jul 8, 2025
adebe0e
fixing base dockerfile and updating toolvllm itself
hamishivi Jul 8, 2025
bca369c
Address comments
hamishivi Jul 9, 2025
3e1404a
pin torch version
hamishivi Jul 9, 2025
221b923
turn off pr push
hamishivi Jul 9, 2025
39248f1
olmo3 vllm working
mnoukhov Jul 9, 2025
316d5d6
test script
mnoukhov Jul 9, 2025
1b8fda3
test script w/o pure docker mode
mnoukhov Jul 9, 2025
c36b53c
intermediate commit
mnoukhov Jul 10, 2025
6fc1f34
Merge remote-tracking branch 'upstream/main' into olmo3
mnoukhov Jul 10, 2025
4d174b9
dockerfile uv working
mnoukhov Jul 10, 2025
67ebd22
test script for olmo3 run
mnoukhov Jul 10, 2025
efa7080
Merge branch 'main' of github.com:allenai/open-instruct into olmo3
mnoukhov Jul 10, 2025
e7aec22
style quality uv.lock
mnoukhov Jul 11, 2025
d267142
eval image and hparams update
mnoukhov Jul 11, 2025
dd6c291
test script, extrs code added to dockerfile
mnoukhov Jul 14, 2025
a91ef17
Merge branch 'main' of github.com:allenai/open-instruct into olmo3
mnoukhov Jul 16, 2025
2fd5e0c
Merge branch 'main' of github.com:allenai/open-instruct into olmo3
mnoukhov Jul 29, 2025
7ba9d44
0.6b model for debug
mnoukhov Jul 29, 2025
7cea66c
updated grpo_olmo3
mnoukhov Jul 29, 2025
df1f30e
change to ray 2.46.0 because 2.48.0 has an error
mnoukhov Jul 29, 2025
8b29998
2 nodes and hparams
mnoukhov Jul 30, 2025
f8cd049
revert to 1 node
mnoukhov Aug 1, 2025
3827088
fix model path
mnoukhov Aug 2, 2025
c6d3a6d
midtraining rounds 3-4
mnoukhov Aug 4, 2025
0538452
long math run
mnoukhov Aug 12, 2025
c41cf78
Merge branch 'main' of github.com:allenai/open-instruct into olmo3
mnoukhov Aug 13, 2025
17adca8
updated dockerfile and pyproject
mnoukhov Aug 13, 2025
715e7f7
Merge branch 'main' of github.com:allenai/open-instruct into olmo3
mnoukhov Aug 19, 2025
975323f
docker Makefile
mnoukhov Aug 19, 2025
31136c4
cleaner olmo3 script
mnoukhov Aug 19, 2025
f73d342
make vllm extra dependency to allow local runs without vllm
mnoukhov Aug 22, 2025
7add82c
Merge branch 'main' of github.com:allenai/open-instruct into olmo3
mnoukhov Aug 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,4 @@ dmypy.json
cache/
local_dataset_cache/
scratch/
vllm_olmo3/
6 changes: 5 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,14 @@ ENV UV_CACHE_DIR=/root/.cache/uv
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV UV_COMPILE_BYTECODE=0

# Install custom vllm for olmo3
RUN git clone -b shanea/olmo3 https://github.com/2015aroras/vllm.git vllm_olmo3

# Install dependencies
RUN --mount=type=cache,target=${UV_CACHE_DIR} \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-cache
uv sync --frozen --no-cache --extra vllm

RUN uv run --no-sync -m nltk.downloader punkt punkt_tab

Expand All @@ -78,6 +81,7 @@ COPY eval eval
COPY configs configs
COPY scripts scripts
COPY mason.py mason.py

COPY oe-eval-internal oe-eval-internal
COPY open_instruct open_instruct

Expand Down
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: style quality
.PHONY: style quality docker

# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
export PYTHONPATH = open_instruct
Expand All @@ -16,3 +16,10 @@ style-check: ## *fail* if anything needs rewriting

quality-check: ## *fail* if any rewrite was needed
uv run ruff check --exit-non-zero-on-fix $(check_dirs)

docker:
DOCKER_BUILDKIT=1 docker build -f Dockerfile --build-arg UV_CACHE_DIR=$(UV_CACHE_DIR) -t open_instruct_olmo3 .
# if you are internally at AI2, you can create an image like this:
$(eval beaker_user := $(shell beaker account whoami --format json | jq -r '.[0].name'))
# beaker image delete $(beaker_user)/open_instruct_olmo3
beaker image create open_instruct_olmo2_retrofit -n open_instruct_olmo3 -w ai2/$(beaker_user)
16 changes: 13 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@ dependencies = [
"nvitop>=1.4.2",
"packaging>=24.2",
"peft>=0.13.2",
"ray[default]>=2.44.1",
"ray[default]==2.46.0",
"setuptools>=75.6.0,<80.0.0",
"tensorboard>=2.18.0",
"torch>=2.7.0,<2.8",
"transformers>=4.52.4,<4.54.0", # see https://github.com/vllm-project/vllm-ascend/issues/2046
"vllm==0.9.1",
"transformers @ git+https://github.com/2015aroras/transformers.git@shanea/olmo3",
"wandb==0.18.1",
"langdetect==1.0.9",
"immutabledict==1.2.0",
Expand All @@ -46,12 +45,14 @@ flash-attn = [{ requirement = "torch", match-runtime = true }]

[tool.uv.extra-build-variables]
flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
vllm = { VLLM_USE_PRECOMPILED = "1" }

# pytorch related setups
[tool.uv.sources]
torch = [
{ index = "pytorch-cu128", marker = "platform_system != 'Darwin'"},
]
vllm = { path = "./vllm_olmo3", editable = true }

[[tool.uv.index]]
name = "pytorch-cu128"
Expand All @@ -70,12 +71,20 @@ code = [
"pydantic>=2.0.0",
"requests>=2.28.0",
]
vllm = [
"vllm"
]

[tool.uv]
preview = true
python-preference = "only-managed"
link-mode = "hardlink"

[[tool.uv.dependency-metadata]]
name = "flash-attn"
version = "2.8.0.post2"
requires-dist = ["torch", "setuptools"]

[dependency-groups]
dev = [
"beaker-py>=1.32.2,<2.0",
Expand Down Expand Up @@ -132,6 +141,7 @@ ignore = [

[tool.ruff.lint.isort]
known-first-party = ["open-instruct"]
known-third-party = ["wandb"]
# case insensitive to match isort --profile black
case-sensitive = false
# Disable split-on-trailing-comma to work with skip-magic-trailing-comma
Expand Down
12 changes: 6 additions & 6 deletions scripts/train/debug/grpo_fast.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
uv run python open_instruct/grpo_fast.py \
python open_instruct/grpo_fast.py \
--dataset_mixer_list ai2-adapt-dev/rlvr_gsm8k_zs 64 \
--dataset_mixer_list_splits train \
--dataset_mixer_eval_list ai2-adapt-dev/rlvr_gsm8k_zs 16 \
Expand All @@ -8,24 +8,24 @@ uv run python open_instruct/grpo_fast.py \
--response_length 512 \
--pack_length 1024 \
--per_device_train_batch_size 1 \
--num_unique_prompts_rollout 8 \
--num_unique_prompts_rollout 16 \
--num_samples_per_prompt_rollout 4 \
--model_name_or_path Qwen/Qwen3-1.7B \
--model_name_or_path Qwen/Qwen3-0.6B \
--stop_strings "</answer>" \
--apply_r1_style_format_reward \
--apply_verifiable_reward true \
--temperature 0.7 \
--temperature 1.0 \
--ground_truths_key ground_truth \
--chat_template_name r1_simple_chat_postpend_think \
--learning_rate 3e-7 \
--total_episodes 200 \
--total_episodes 256 \
--deepspeed_stage 2 \
--num_epochs 1 \
--num_learners_per_node 1 \
--vllm_tensor_parallel_size 1 \
--beta 0.01 \
--seed 3 \
--local_eval_every 1 \
--num_evals 4 \
--vllm_sync_backend gloo \
--vllm_gpu_memory_utilization 0.3 \
--save_traces \
Expand Down
82 changes: 82 additions & 0 deletions scripts/train/rlvr/grpo_olmo3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# full integration mix
# dataset_mix="saurabh5/rlvr_acecoder_filtered 63033 hamishivi/rlvr_orz_math_57k_collected 56878 hamishivi/tulu_3_rewritten_400k_string_f1_only_v2 56878 allenai/IF_multi_constraints_upto5 56878"
# math only mix
dataset_mix="hamishivi/rlvr_orz_math_57k_collected 56878"

# all evals
# evals="minerva_math::hamish_zs_reasoning,gsm8k::zs_cot_latex,gsm8k::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,zebralogic::hamish_zs_reasoning,aime::hamish_zs_reasoning,agi_eval_english:0shot_cot::hamish_zs_reasoning,gpqa:0shot_cot::hamish_zs_reasoning,ifeval::hamish_zs_reasoning,popqa::hamish_zs_reasoning,mmlu:cot::hamish_zs_reasoning,alpaca_eval_v3::hamish_zs_reasoning,bbh:cot::hamish_zs_reasoning,mbppplus:0-shot-chat::tulu-thinker,codex_humanevalplus:0-shot-chat-v1::tulu-thinker"
# math evals
evals="minerva_math::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,aime:zs_cot_r1::pass_at_32_2024_temp1,aime:zs_cot_r1::pass_at_32_2025_temp1"

model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round5-100B-olmo3_7b-anneal-decon-12T-00bb6023/step47684-hf"
gs_model_name="olmo3-midtraining-round5"

exp_name="grpo_mathonly_1m_${gs_model_name}"
EXP_NAME=${EXP_NAME:-${exp_name}}


# cluster
cluster=ai2/augusta-google-1
# cluster=ai2/jupiter-cirrascale-2

NUM_GPUS=${NUM_GPUS:-8}

python mason.py \
--task_name ${EXP_NAME} \
--cluster ${cluster} \
--workspace ai2/tulu-thinker \
--priority high \
--pure_docker_mode \
--image michaeln/open_instruct_olmo3 \
--preemptible \
--num_nodes 2 \
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
--env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
--gs_model_name $gs_model_name \
--gpus ${NUM_GPUS} \
--budget ai2/oe-adapt \
-- \
source configs/beaker_configs/ray_node_setup.sh \&\& \
source configs/beaker_configs/code_api_setup.sh \&\& \
python open_instruct/grpo_fast.py \
--exp_name ${EXP_NAME} \
--beta 0.0 \
--num_samples_per_prompt_rollout 16 \
--num_unique_prompts_rollout 128 \
--num_mini_batches 4 \
--num_epochs 1 \
--learning_rate 1e-6 \
--per_device_train_batch_size 1 \
--kl_estimator kl3 \
--dataset_mixer_list ${dataset_mix} \
--dataset_mixer_list_splits train \
--dataset_mixer_eval_list hamishivi/tulu_3_rewritten_100k 32 \
--dataset_mixer_eval_list_splits train \
--max_token_length 8192 \
--max_prompt_token_length 2048 \
--response_length 6144 \
--pack_length 8192 \
--model_name_or_path ${model_name_or_path} \
--chat_template_name olmo_thinker_r1_style \
--stop_strings "</answer>" \
--non_stop_penalty False \
--temperature 1.0 \
--total_episodes 1024000 \
--deepspeed_stage 2 \
--num_learners_per_node 8 \
--vllm_num_engines 8 \
--lr_scheduler_type constant \
--apply_verifiable_reward true \
--seed 1 \
--local_eval_every 25 \
--save_freq 25 \
--checkpoint_state_freq 25 \
--gradient_checkpointing \
--with_tracking \
--vllm_enable_prefix_caching \
--clip_higher 0.272 \
--mask_truncated_completions True \
--oe_eval_max_length 8192 \
--try_launch_beaker_eval_jobs_on_weka True \
--oe_eval_tasks ${evals} \
--oe_eval_beaker_image oe-eval-beaker/oe_eval_olmo3_auto $@
Loading
Loading