From 34f142797aff17af2a1c22d68529269d231cc8d4 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Sat, 8 Jun 2024 18:47:10 -0700 Subject: [PATCH] llm: always add bos token to prompt (#4941) * fix embedding by adding fixes from llama.cpp upstream * remove assert --------- Co-authored-by: Jesper Ek --- llm/ext_server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 8a0dffea..7d14e48e 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -835,7 +835,7 @@ struct llama_server_context system_tokens.clear(); if (!system_prompt.empty()) { - system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token); + system_tokens = ::llama_tokenize(ctx, system_prompt, true); llama_batch_clear(batch); @@ -1656,7 +1656,7 @@ struct llama_server_context slot.t_start_process_prompt = ggml_time_us(); slot.t_start_genereration = 0; - prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt + prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt slot.n_prompt_tokens = prompt_tokens.size();