@@ -562,6 +562,35 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
562
562
break ;
563
563
}
564
564
params.lora_base = argv[i];
565
+ } else if (arg == " --control-vector" ) {
566
+ if (++i >= argc) {
567
+ invalid_param = true ;
568
+ break ;
569
+ }
570
+ params.control_vectors .push_back (std::make_tuple (argv[i], 1 .0f ));
571
+ } else if (arg == " --control-vector-scaled" ) {
572
+ if (++i >= argc) {
573
+ invalid_param = true ;
574
+ break ;
575
+ }
576
+ const char * control_vector = argv[i];
577
+ if (++i >= argc) {
578
+ invalid_param = true ;
579
+ break ;
580
+ }
581
+ params.control_vectors .push_back (std::make_tuple (control_vector, std::stof (argv[i])));
582
+ } else if (arg == " --control-vector-layer-range" ) {
583
+ if (++i >= argc) {
584
+ invalid_param = true ;
585
+ break ;
586
+ }
587
+ int32_t start = std::stoi (argv[i]);
588
+ if (++i >= argc) {
589
+ invalid_param = true ;
590
+ break ;
591
+ }
592
+ int32_t end = std::stoi (argv[i]);
593
+ params.control_vector_layer_range = std::make_tuple (start, end);
565
594
} else if (arg == " --mmproj" ) {
566
595
if (++i >= argc) {
567
596
invalid_param = true ;
@@ -1087,6 +1116,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
1087
1116
printf (" --lora FNAME apply LoRA adapter (implies --no-mmap)\n " );
1088
1117
printf (" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n " );
1089
1118
printf (" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
1119
+ printf (" --control-vector FNAME\n " );
1120
+ printf (" add a control vector\n " );
1121
+ printf (" --control-vector-scaled FNAME S\n " );
1122
+ printf (" add a control vector with user defined scaling S\n " );
1123
+ printf (" --control-vector-layer-range START END\n " );
1124
+ printf (" layer range to apply the control vector(s) to, start and end inclusive\n " );
1090
1125
printf (" -m FNAME, --model FNAME\n " );
1091
1126
printf (" model path (default: %s)\n " , params.model .c_str ());
1092
1127
printf (" -md FNAME, --model-draft FNAME\n " );
@@ -1351,6 +1386,41 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
1351
1386
return std::make_tuple (nullptr , nullptr );
1352
1387
}
1353
1388
1389
+ if (!params.control_vectors .empty ()) {
1390
+ int32_t layer_start, layer_end;
1391
+ std::tie (layer_start, layer_end) = params.control_vector_layer_range ;
1392
+
1393
+ if (layer_start == 0 ) layer_start = 1 ;
1394
+ if (layer_end == 0 ) layer_end = 31 ;
1395
+
1396
+ struct llama_control_vector * vector = nullptr ;
1397
+
1398
+ for (const auto & t : params.control_vectors ) {
1399
+ std::string path;
1400
+ float strength;
1401
+ std::tie (path, strength) = t;
1402
+
1403
+ fprintf (stderr, " %s: loading control vector from %s\n " , __func__, path.c_str ());
1404
+ struct llama_control_vector * temp = llama_control_vector_load (path.c_str ());
1405
+ if (temp == nullptr ) {
1406
+ fprintf (stderr, " %s: error: failed to load control vector from %s\n " , __func__, path.c_str ());
1407
+ llama_free (lctx);
1408
+ llama_free_model (model);
1409
+ return std::make_tuple (nullptr , nullptr );
1410
+ }
1411
+ llama_control_vector_scale (temp, strength);
1412
+
1413
+ if (vector == nullptr ) {
1414
+ vector = temp;
1415
+ } else {
1416
+ llama_control_vector_add (vector, temp);
1417
+ llama_control_vector_free (temp);
1418
+ }
1419
+ }
1420
+
1421
+ llama_apply_control_vector (lctx, vector, layer_start, layer_end);
1422
+ }
1423
+
1354
1424
for (unsigned int i = 0 ; i < params.lora_adapter .size (); ++i) {
1355
1425
const std::string& lora_adapter = std::get<0 >(params.lora_adapter [i]);
1356
1426
float lora_scale = std::get<1 >(params.lora_adapter [i]);
0 commit comments