From 0a0e9f3e0fa30e49c330cc48932c703d2a4d1e7a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 19 Mar 2024 09:49:24 +0100 Subject: [PATCH] Apply 01-cache.diff --- llm/ext_server/server.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 5df5bb47..f5d7863d 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -1007,13 +1007,15 @@ struct llama_server_context slot.n_sent_text += result.text_to_send.size(); // add the token to slot queue and cache } - slot.add_token_string(result); + if (slot.params.stream) { send_partial_response(slot, result); } } + slot.add_token_string(result); + if (incomplete) { slot.has_next_token = true;