feat: allow setting KV cache type

This commit is contained in:
Sam 2024-11-06 08:10:52 +11:00
parent c3ec09bb69
commit af7d64b887

View File

@ -141,10 +141,10 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
_, graphPartialOffload, graphFullOffload = ggml.GraphSize(uint64(opts.NumCtx), uint64(min(opts.NumCtx, opts.NumBatch)))
if graphPartialOffload == 0 {
graphPartialOffload = ggml.KV().GQA() * kv / 6
graphPartialOffload = ggml.KV().GQA() * kv / 6
}
if graphFullOffload == 0 {
graphFullOffload = graphPartialOffload
graphFullOffload = graphPartialOffload
}
// KV is proportional to the number of layers