From d93fbf6a60b95234656b2c6b2f734cf4429a716d Mon Sep 17 00:00:00 2001 From: zsmooter Date: Sat, 13 Jul 2024 23:42:48 -0400 Subject: [PATCH] Disable mmap by default for Windows ROCm --- llm/server.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llm/server.go b/llm/server.go index ffed9fc0..dc79dc2b 100644 --- a/llm/server.go +++ b/llm/server.go @@ -242,10 +242,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr params = append(params, "--flash-attn") } - // Windows CUDA should not use mmap for best performance + // Windows CUDA/ROCm should not use mmap for best performance // Linux with a model larger than free space, mmap leads to thrashing // For CPU loads we want the memory to be allocated, not FS cache - if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) || + if (runtime.GOOS == "windows" && (gpus[0].Library == "cuda" || gpus[0].Library == "rocm") && opts.UseMMap == nil) || (runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) || (gpus[0].Library == "cpu" && opts.UseMMap == nil) || (opts.UseMMap != nil && !*opts.UseMMap) {