@@ -17,10 +17,12 @@ NUM_INSTRUCTIONS=5
17
17
GENERATE_ARGS=(" --num-cpus" " $( nproc) " --taxonomy-path=' ./taxonomy' )
18
18
DIFF_ARGS=(" --taxonomy-path" " ./taxonomy" )
19
19
TRAIN_ARGS=()
20
- LEGACYTRAIN=0
21
20
PHASED_TRAINING=0
22
- TRAIN_LIBRARY=0
23
21
BACKEND=" llama-cpp"
22
+ FOUR_BIT_QUANT=0
23
+ SIMPLE_TRAIN=0
24
+ FULL_TRAIN=0
25
+ ACCELERATED_TRAIN=0
24
26
HF_TOKEN=${HF_TOKEN:- }
25
27
SDG_PIPELINE=" simple"
26
28
SKIP_TRAIN=${SKIP_TRAIN:- 0}
@@ -97,15 +99,14 @@ set_defaults() {
97
99
exit 1
98
100
fi
99
101
100
- if [ " ${PHASED_TRAINING} " -eq 1 ] && [ " ${TRAIN_LIBRARY } " -eq 0 ]; then
101
- echo " ERROR: You have -P set. It requires -T ."
102
+ if [ " ${PHASED_TRAINING} " -eq 1 ] && [ " ${ACCELERATED_TRAIN } " -eq 0 ]; then
103
+ echo " ERROR: You have -P set. It requires -a ."
102
104
exit 1
103
105
fi
104
106
105
107
if [ " $MINIMAL " -eq 1 ]; then
106
108
# Minimal settings to run in less time
107
109
NUM_INSTRUCTIONS=1
108
- TRAIN_ARGS+=(" --num-epochs" " 1" )
109
110
fi
110
111
}
111
112
@@ -116,14 +117,8 @@ test_smoke() {
116
117
117
118
test_init () {
118
119
task Initializing ilab
119
-
120
- if [ " $LEGACYTRAIN " -eq 1 ]; then
121
- # TODO Only cuda for now
122
- step Setting train-profile for GPU accelerated training
123
- ilab config init --non-interactive --train-profile=" ${SCRIPTDIR} /test-data/train-profile-a10.yaml"
124
- else
125
- ilab config init --non-interactive
126
- fi
120
+
121
+ ilab config init --non-interactive
127
122
128
123
step Replace model in config.yaml
129
124
if [ " ${BACKEND} " == " vllm" ]; then
@@ -283,29 +278,36 @@ test_generate() {
283
278
test_train () {
284
279
task Train the model
285
280
286
- if [ " $TRAIN_LIBRARY " -eq 1 ]; then
287
- local data
288
- data=$( find " ${DATA_HOME} " /instructlab/datasets -name ' messages_*' | head -n 1)
281
+ local data
282
+ data=$( find " ${DATA_HOME} " /instructlab/datasets -name ' skills_train_msgs_*' | head -n 1)
283
+
284
+ # simple, full, and accelerated, are different workflows
285
+ # To mimic a real user e2e scenario, only one of these should be run on a given system
286
+ # The `small` worker can manage `simple`, The medium worker can handle `full` and the large worker can handle `accelerated`
287
+ if [ " $ACCELERATED_TRAIN " -eq 1 ]; then
289
288
# TODO Only cuda for now
290
289
# the train profile specified in test_init overrides the majority of TRAIN_ARGS, including things like num_epochs. While it looks like much of those settings are being lost, they just have different values here.
291
- TRAIN_ARGS=(" --device=cuda" " --model-path=${GRANITE_SAFETENSOR_REPO} " " --data-path=${data} " " --lora-quantize-dtype=nf4" " --4-bit-quant" " --effective-batch-size=4" " --is-padding-free=False" )
290
+ TRAIN_ARGS=(" --pipeline=accelerated " " -- device=cuda" " --model-path=${GRANITE_SAFETENSOR_REPO} " " --data-path=${data} " " --lora-quantize-dtype=nf4" " --4-bit-quant" " --effective-batch-size=4" " --is-padding-free=False" )
292
291
if [ " ${BACKEND} " != " vllm" ]; then
293
292
TRAIN_ARGS+=(" --gguf-model-path" " ${CACHE_HOME} /instructlab/models/${GRANITE_GGUF_MODEL} " )
294
293
fi
295
-
296
- ilab model train " ${TRAIN_ARGS[@]} "
297
- else
298
- # TODO Only cuda for now
299
- TRAIN_ARGS+=(" --legacy" " --device=cuda" )
300
- if [ " $LEGACYTRAIN " -eq 0 ]; then
294
+ fi
295
+ if [ " $SIMPLE_TRAIN " -eq 1 ]; then
296
+ if [ " $FOUR_BIT_QUANT " -eq 1 ]; then
301
297
TRAIN_ARGS+=(" --4-bit-quant" )
302
298
fi
299
+ # TODO Only cuda for now
300
+ TRAIN_ARGS+=(" --pipeline=simple" " --device=cuda" " --num-epochs=1" )
303
301
if [ " ${BACKEND} " != " vllm" ]; then
304
302
TRAIN_ARGS+=(" --gguf-model-path" " ${CACHE_HOME} /instructlab/models/${GRANITE_GGUF_MODEL} " )
305
303
fi
306
-
307
- ilab model train " ${TRAIN_ARGS[@]} "
308
304
fi
305
+ if [ " $FULL_TRAIN " -eq 1 ]; then
306
+ # test training on a CPU not the GPU
307
+ TRAIN_ARGS=(" --num-epochs=1" " --pipeline=full" " --model-path=${GRANITE_SAFETENSOR_REPO} " " --data-path=${data} " " --effective-batch-size=4" --device=cpu)
308
+ fi
309
+
310
+ ilab model train " ${TRAIN_ARGS[@]} "
309
311
}
310
312
311
313
test_phased_train () {
@@ -451,15 +453,18 @@ test_exec() {
451
453
# When we run training with --4-bit-quant, we can't convert the result to a gguf
452
454
# https://github.com/instructlab/instructlab/issues/579
453
455
# so we skip trying to test the result
454
- if [ " $LEGACYTRAIN " -eq 1 ]; then
456
+ if [ " $FULL_TRAIN " -eq 1 ]; then
455
457
# When you run this --
456
458
# `ilab model convert` is only implemented for macOS with M-series chips for now
457
459
# test_convert
458
-
459
- test_serve trained " ${DATA_HOME} /instructlab/checkpoints/model.gguf"
460
+
461
+ # when using full train, choose any GGUF from any of the checkpoints dirs
462
+ model_dir=$( find " ${DATA_HOME} " /instructlab/checkpoints/hf_format -name ' samples_*' | head -n 1)
463
+
464
+ test_serve trained " ${model_dir} /pytorch_model-Q4_K_M.gguf"
460
465
PID=$!
461
466
462
- test_chat
467
+ ilab model chat -qq --model " ${model_dir} /pytorch_model-Q4_K_M.gguf " --endpoint-url http://localhost:8000/v1 ' Say "Hello" and nothing else\n '
463
468
464
469
# Kill the serve process
465
470
task Stopping the ilab model serve for trained model
@@ -508,21 +513,21 @@ wait_for_server() {
508
513
usage () {
509
514
echo " Usage: $0 [-m] [-h]"
510
515
echo " -e Run model evaluation"
511
- echo " -T Use the 'full' training library rather than legacy training"
516
+ echo " -q Use 4-bit-quant when training"
517
+ echo " -a Use the 'full' training library rather than legacy training"
518
+ echo " -s Run the simple training using the SFTTainer rather than the custom training loop"
512
519
echo " -f Run the fullsize training instead of --4-bit-quant"
513
520
echo " -F Use the 'full' SDG pipeline instead of the default 'simple' pipeline"
514
521
echo " -h Show this help text"
515
- echo " -L Run legacy training with 4-bit quantization"
516
522
echo " -m Run minimal configuration with lower number of instructions and training epochs (run quicker when you have no GPU)"
517
523
echo " -M Use the mixtral model (4-bit quantized) instead of merlinite model (4-bit quantized)."
518
524
echo " -P Run multi-phase training"
519
- echo " -T Use the 'full' training library rather than legacy training"
520
525
echo " -v Use the vLLM backend for serving"
521
526
}
522
527
523
528
# Process command line arguments
524
529
task " Configuring ..."
525
- while getopts " eFhLmMPTv " opt; do
530
+ while getopts " eFhqasfmMPv " opt; do
526
531
case $opt in
527
532
e)
528
533
EVAL=1
@@ -536,10 +541,6 @@ while getopts "eFhLmMPTv" opt; do
536
541
usage
537
542
exit 0
538
543
;;
539
- L)
540
- LEGACYTRAIN=1
541
- step " Running legacy training with 4-bit quantization."
542
- ;;
543
544
m)
544
545
MINIMAL=1
545
546
step " Running minimal configuration."
@@ -552,14 +553,26 @@ while getopts "eFhLmMPTv" opt; do
552
553
PHASED_TRAINING=1
553
554
step " Running multi-phase training."
554
555
;;
555
- T)
556
- TRAIN_LIBRARY=1
557
- step " Running with training library."
558
- ;;
559
556
v)
560
557
BACKEND=vllm
561
558
step " Running with vLLM backend."
562
559
;;
560
+ q)
561
+ FOUR_BIT_QUANT=1
562
+ step " Running training using 4-bit-quantization."
563
+ ;;
564
+ s)
565
+ SIMPLE_TRAIN=1
566
+ step " Running the simple training pipeline"
567
+ ;;
568
+ f)
569
+ FULL_TRAIN=1
570
+ step " Running the full training pipeline"
571
+ ;;
572
+ a)
573
+ ACCELERATED_TRAIN=1
574
+ step " Running using the training library"
575
+ ;;
563
576
\? )
564
577
echo " Invalid option: -$opt " >&2
565
578
usage
0 commit comments