feat: allow setting KV cache type

This commit is contained in:
Sam 2024-11-06 08:12:07 +11:00
parent af7d64b887
commit cd0be17fba

View File

@ -147,9 +147,6 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
graphFullOffload = graphPartialOffload
}
// KV is proportional to the number of layers
layerSize += kv / ggml.KV().BlockCount()
// on metal there's no partial offload overhead
if gpus[0].Library == "metal" {
graphPartialOffload = graphFullOffload