@@ -127,7 +127,6 @@ struct slot_params {
127
127
std::vector<std::string> response_fields;
128
128
bool timings_per_token = false ;
129
129
bool post_sampling_probs = false ;
130
- bool ignore_eos = false ;
131
130
132
131
struct common_params_sampling sampling;
133
132
struct common_params_speculative speculative;
@@ -441,7 +440,6 @@ struct server_task {
441
440
442
441
{
443
442
params.sampling .logit_bias .clear ();
444
- params.ignore_eos = json_value (data, " ignore_eos" , false );
445
443
446
444
const auto & logit_bias = data.find (" logit_bias" );
447
445
if (logit_bias != data.end () && logit_bias->is_array ()) {
@@ -472,6 +470,16 @@ struct server_task {
472
470
}
473
471
}
474
472
}
473
+
474
+ params.sampling .ignore_eos = json_value (data, " ignore_eos" , params_base.sampling .ignore_eos );
475
+ if (params.sampling .ignore_eos ) {
476
+ for (llama_token i = 0 ; i < llama_vocab_n_tokens (vocab); i++) {
477
+ if (llama_vocab_is_eog (vocab, i)) {
478
+ // SRV_DBG("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(ctx, i).c_str(), -INFINITY);
479
+ params.sampling .logit_bias .push_back ({i, -INFINITY});
480
+ }
481
+ }
482
+ }
475
483
}
476
484
477
485
{
@@ -2217,10 +2225,6 @@ struct server_context {
2217
2225
slot.params .n_predict = slot.n_predict ;
2218
2226
}
2219
2227
2220
- if (slot.params .ignore_eos && has_eos_token) {
2221
- slot.params .sampling .logit_bias .push_back ({llama_vocab_eos (vocab), -INFINITY});
2222
- }
2223
-
2224
2228
{
2225
2229
if (slot.smpl != nullptr ) {
2226
2230
common_sampler_free (slot.smpl );
0 commit comments