@@ -275,11 +275,11 @@ struct llama_server_context
275
275
if (suff_rm_leading_spc && suffix_tokens[0 ] == space_token) {
276
276
suffix_tokens.erase (suffix_tokens.begin ());
277
277
}
278
- prefix_tokens.insert (prefix_tokens.begin (), llama_token_prefix (ctx ));
279
- prefix_tokens.insert (prefix_tokens.begin (), llama_token_bos (ctx )); // always add BOS
280
- prefix_tokens.insert (prefix_tokens.end (), llama_token_suffix (ctx ));
278
+ prefix_tokens.insert (prefix_tokens.begin (), llama_token_prefix (model ));
279
+ prefix_tokens.insert (prefix_tokens.begin (), llama_token_bos (model )); // always add BOS
280
+ prefix_tokens.insert (prefix_tokens.end (), llama_token_suffix (model ));
281
281
prefix_tokens.insert (prefix_tokens.end (), suffix_tokens.begin (), suffix_tokens.end ());
282
- prefix_tokens.push_back (llama_token_middle (ctx ));
282
+ prefix_tokens.push_back (llama_token_middle (model ));
283
283
284
284
auto prompt_tokens = prefix_tokens;
285
285
@@ -419,7 +419,7 @@ struct llama_server_context
419
419
if (params.n_predict == 0 )
420
420
{
421
421
has_next_token = false ;
422
- result.tok = llama_token_eos (ctx );
422
+ result.tok = llama_token_eos (model );
423
423
return result;
424
424
}
425
425
@@ -453,7 +453,7 @@ struct llama_server_context
453
453
// decrement remaining sampling budget
454
454
--n_remain;
455
455
456
- if (!embd.empty () && embd.back () == llama_token_eos (ctx ))
456
+ if (!embd.empty () && embd.back () == llama_token_eos (model ))
457
457
{
458
458
// stopping_word = llama_token_to_piece(ctx, embd.back());
459
459
has_next_token = false ;
@@ -594,7 +594,7 @@ static void parse_options_completion(bool streaming,const backend::PredictOption
594
594
595
595
if (predict->ignoreeos ())
596
596
{
597
- llama.params .sparams .logit_bias [llama_token_eos (llama.ctx )] = -INFINITY;
597
+ llama.params .sparams .logit_bias [llama_token_eos (llama.model )] = -INFINITY;
598
598
}
599
599
600
600
// const auto &logit_bias = body.find("logit_bias");
@@ -676,7 +676,7 @@ static void params_parse(const backend::ModelOptions* request,
676
676
}
677
677
678
678
static bool is_at_eob (llama_server_context &server_context, const llama_token *tokens, const size_t n_tokens) {
679
- return n_tokens && tokens[n_tokens-1 ] == llama_token_eos (server_context.ctx );
679
+ return n_tokens && tokens[n_tokens-1 ] == llama_token_eos (server_context.model );
680
680
}
681
681
682
682
// Function matching type llama_beam_search_callback_fn_t.
0 commit comments