feat: allow setting KV cache type

2024-11-06 08:10:52 +11:00 · 2024-11-06 08:10:52 +11:00 · af7d64b887
commit af7d64b887
parent c3ec09bb69
1 changed files with 2 additions and 2 deletions
--- a/llm/memory.go
+++ b/llm/memory.go
@ -141,10 +141,10 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 	_, graphPartialOffload, graphFullOffload = ggml.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)))

 	if graphPartialOffload == 0 {
-			graphPartialOffload = ggml.KV().GQA() * kv / 6
+		graphPartialOffload = ggml.KV().GQA() * kv / 6
 	}
 	if graphFullOffload == 0 {
-			graphFullOffload = graphPartialOffload
+		graphFullOffload = graphPartialOffload
 	}

 	// KV is proportional to the number of layers