feat: allow setting KV cache type
This commit is contained in:
parent
af7d64b887
commit
cd0be17fba
@ -147,9 +147,6 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
|
|||||||
graphFullOffload = graphPartialOffload
|
graphFullOffload = graphPartialOffload
|
||||||
}
|
}
|
||||||
|
|
||||||
// KV is proportional to the number of layers
|
|
||||||
layerSize += kv / ggml.KV().BlockCount()
|
|
||||||
|
|
||||||
// on metal there's no partial offload overhead
|
// on metal there's no partial offload overhead
|
||||||
if gpus[0].Library == "metal" {
|
if gpus[0].Library == "metal" {
|
||||||
graphPartialOffload = graphFullOffload
|
graphPartialOffload = graphFullOffload
|
||||||
|
Loading…
x
Reference in New Issue
Block a user