diff --git a/llm/memory.go b/llm/memory.go
index 26abdcee..8eb99ca6 100644
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -147,9 +147,6 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 		graphFullOffload = graphPartialOffload
 	}
 
-	// KV is proportional to the number of layers
-	layerSize += kv / ggml.KV().BlockCount()
-
 	// on metal there's no partial offload overhead
 	if gpus[0].Library == "metal" {
 		graphPartialOffload = graphFullOffload