diff --git a/api/types.go b/api/types.go index e5291a02..266f241a 100644 --- a/api/types.go +++ b/api/types.go @@ -227,6 +227,7 @@ type Options struct { MirostatEta float32 `json:"mirostat_eta,omitempty"` PenalizeNewline bool `json:"penalize_newline,omitempty"` Stop []string `json:"stop,omitempty"` + CachePrompt bool `json:"cache_prompt,omitempty"` } // Runner options which must be set when the model is loaded into memory @@ -605,6 +606,7 @@ func DefaultOptions() Options { MirostatEta: 0.1, PenalizeNewline: true, Seed: -1, + CachePrompt: true, Runner: Runner{ // options set when the model is loaded diff --git a/cmd/interactive.go b/cmd/interactive.go index abbf05f4..8aa7993a 100644 --- a/cmd/interactive.go +++ b/cmd/interactive.go @@ -107,6 +107,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error { fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n Set how far back to look for repetitions") fmt.Fprintln(os.Stderr, " /set parameter num_gpu The number of layers to send to the GPU") fmt.Fprintln(os.Stderr, " /set parameter stop ... Set the stop parameters") + fmt.Fprintln(os.Stderr, " /set parameter cache_prompt Set the cache_prompt parameter of llama.cpp") fmt.Fprintln(os.Stderr, "") } diff --git a/llm/server.go b/llm/server.go index 96815826..4e33a71d 100644 --- a/llm/server.go +++ b/llm/server.go @@ -720,7 +720,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu "seed": req.Options.Seed, "stop": req.Options.Stop, "image_data": req.Images, - "cache_prompt": true, + "cache_prompt": req.Options.CachePrompt, } // Make sure the server is ready diff --git a/server/routes_test.go b/server/routes_test.go index bd5b56af..633b96a4 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -82,7 +82,7 @@ func Test_Routes(t *testing.T) { fname := createTestFile(t, "ollama-model") - r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname)) + r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar\nPARAMETER cache_prompt false", fname)) modelfile, err := parser.ParseFile(r) if err != nil { t.Fatalf("failed to parse file: %v", err) @@ -396,6 +396,7 @@ func Test_Routes(t *testing.T) { } sort.Strings(params) expectedParams := []string{ + "cache_prompt false", "seed 42", "stop \"bar\"", "stop \"foo\"",