From 0c2f95f3de4c2b84733fa1fcafc954d244f7adb3 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 13 Aug 2024 10:38:03 -0700 Subject: [PATCH] runner: Initialize numPredict numPredict is used to enforce a limit on the number of tokens to generate. Is it passed in from Ollama but it is never stored to be checked. --- llama/runner/runner.go | 1 + 1 file changed, 1 insertion(+) diff --git a/llama/runner/runner.go b/llama/runner/runner.go index d45f96cf..9aca6cfe 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -91,6 +91,7 @@ func (s *Server) NewSequence(prompt string, numPredict int, stop []string, param return &Sequence{ tokens: tokens, n_prompt_tokens: len(tokens), + numPredict: numPredict, responses: make(chan string, 1), embedding: make(chan []float32, 1), samplingCtx: sc,