Merge d93fbf6a60b95234656b2c6b2f734cf4429a716d into d7eb05b9361febead29a74e71ddffc2ebeff5302
This commit is contained in:
commit
fd22c9706a
@ -238,10 +238,10 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
params = append(params, "--flash-attn")
|
||||
}
|
||||
|
||||
// Windows CUDA should not use mmap for best performance
|
||||
// Windows CUDA/ROCm should not use mmap for best performance
|
||||
// Linux with a model larger than free space, mmap leads to thrashing
|
||||
// For CPU loads we want the memory to be allocated, not FS cache
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
|
||||
if (runtime.GOOS == "windows" && (gpus[0].Library == "cuda" || gpus[0].Library == "rocm") && opts.UseMMap == nil) ||
|
||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
|
||||
(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
|
||||
(opts.UseMMap != nil && !*opts.UseMMap) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user