From cd0be17fbadf359126d966c58448d5e6a5f80668 Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 6 Nov 2024 08:12:07 +1100 Subject: [PATCH] feat: allow setting KV cache type --- llm/memory.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/llm/memory.go b/llm/memory.go index 26abdcee..8eb99ca6 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -147,9 +147,6 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, graphFullOffload = graphPartialOffload } - // KV is proportional to the number of layers - layerSize += kv / ggml.KV().BlockCount() - // on metal there's no partial offload overhead if gpus[0].Library == "metal" { graphPartialOffload = graphFullOffload