From cd0be17fbadf359126d966c58448d5e6a5f80668 Mon Sep 17 00:00:00 2001
From: Sam <sammcj@users.noreply.github.com>
Date: Wed, 6 Nov 2024 08:12:07 +1100
Subject: [PATCH] feat: allow setting KV cache type

---
 llm/memory.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llm/memory.go b/llm/memory.go
index 26abdcee..8eb99ca6 100644
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -147,9 +147,6 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 		graphFullOffload = graphPartialOffload
 	}
 
-	// KV is proportional to the number of layers
-	layerSize += kv / ggml.KV().BlockCount()
-
 	// on metal there's no partial offload overhead
 	if gpus[0].Library == "metal" {
 		graphPartialOffload = graphFullOffload