Skip to content

Commit 538cc77

Browse files
authored
server : fix handling of the ignore_eos flag (ggml-org#14710)
ggml-ci
1 parent 5cae766 commit 538cc77

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

tools/server/server.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ struct slot_params {
127127
std::vector<std::string> response_fields;
128128
bool timings_per_token = false;
129129
bool post_sampling_probs = false;
130-
bool ignore_eos = false;
131130

132131
struct common_params_sampling sampling;
133132
struct common_params_speculative speculative;
@@ -441,7 +440,6 @@ struct server_task {
441440

442441
{
443442
params.sampling.logit_bias.clear();
444-
params.ignore_eos = json_value(data, "ignore_eos", false);
445443

446444
const auto & logit_bias = data.find("logit_bias");
447445
if (logit_bias != data.end() && logit_bias->is_array()) {
@@ -472,6 +470,16 @@ struct server_task {
472470
}
473471
}
474472
}
473+
474+
params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos);
475+
if (params.sampling.ignore_eos) {
476+
for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
477+
if (llama_vocab_is_eog(vocab, i)) {
478+
//SRV_DBG("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(ctx, i).c_str(), -INFINITY);
479+
params.sampling.logit_bias.push_back({i, -INFINITY});
480+
}
481+
}
482+
}
475483
}
476484

477485
{
@@ -2217,10 +2225,6 @@ struct server_context {
22172225
slot.params.n_predict = slot.n_predict;
22182226
}
22192227

2220-
if (slot.params.ignore_eos && has_eos_token) {
2221-
slot.params.sampling.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
2222-
}
2223-
22242228
{
22252229
if (slot.smpl != nullptr) {
22262230
common_sampler_free(slot.smpl);

0 commit comments

Comments
 (0)