diff --git a/gpu/gpu.go b/gpu/gpu.go index 58144991..6e25cb46 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -360,14 +360,17 @@ func GetGPUInfo() GpuInfoList { "before", "total", format.HumanBytes2(cpus[0].TotalMemory), "free", format.HumanBytes2(cpus[0].FreeMemory), + "free_swap", format.HumanBytes2(cpus[0].FreeSwap), ), slog.Group( "now", "total", format.HumanBytes2(mem.TotalMemory), "free", format.HumanBytes2(mem.FreeMemory), + "free_swap", format.HumanBytes2(mem.FreeSwap), ), ) cpus[0].FreeMemory = mem.FreeMemory + cpus[0].FreeSwap = mem.FreeSwap } var memInfo C.mem_info_t diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go index 39d8fcf8..cb066e58 100644 --- a/gpu/gpu_darwin.go +++ b/gpu/gpu_darwin.go @@ -57,6 +57,7 @@ func GetCPUMem() (memInfo, error) { return memInfo{ TotalMemory: uint64(C.getPhysicalMemory()), FreeMemory: uint64(C.getFreeMemory()), + // FreeSwap omitted as Darwin uses dynamic paging }, nil } diff --git a/gpu/gpu_linux.go b/gpu/gpu_linux.go index a099bf82..0d08ce8d 100644 --- a/gpu/gpu_linux.go +++ b/gpu/gpu_linux.go @@ -50,7 +50,7 @@ var OneapiMgmtName = "libze_intel_gpu.so" func GetCPUMem() (memInfo, error) { var mem memInfo - var total, available, free, buffers, cached uint64 + var total, available, free, buffers, cached, freeSwap uint64 f, err := os.Open("/proc/meminfo") if err != nil { return mem, err @@ -70,20 +70,21 @@ func GetCPUMem() (memInfo, error) { _, err = fmt.Sscanf(line, "Buffers:%d", &buffers) case strings.HasPrefix(line, "Cached:"): _, err = fmt.Sscanf(line, "Cached:%d", &cached) + case strings.HasPrefix(line, "SwapFree:"): + _, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap) default: continue } if err != nil { return mem, err } - - if total > 0 && available > 0 { - mem.TotalMemory = total * format.KibiByte - mem.FreeMemory = available * format.KibiByte - return mem, nil - } } mem.TotalMemory = total * format.KibiByte - mem.FreeMemory = (free + buffers + cached) * format.KibiByte + mem.FreeSwap = freeSwap * format.KibiByte + if available > 0 { + mem.FreeMemory = available * format.KibiByte + } else { + mem.FreeMemory = (free + buffers + cached) * format.KibiByte + } return mem, nil } diff --git a/gpu/gpu_windows.go b/gpu/gpu_windows.go index f8c2e76f..cd0629da 100644 --- a/gpu/gpu_windows.go +++ b/gpu/gpu_windows.go @@ -51,5 +51,5 @@ func GetCPUMem() (memInfo, error) { if r1 == 0 { return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err) } - return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys}, nil + return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil } diff --git a/gpu/types.go b/gpu/types.go index 7a7749b8..8d22b06b 100644 --- a/gpu/types.go +++ b/gpu/types.go @@ -10,6 +10,7 @@ import ( type memInfo struct { TotalMemory uint64 `json:"total_memory,omitempty"` FreeMemory uint64 `json:"free_memory,omitempty"` + FreeSwap uint64 `json:"free_swap,omitempty"` } // Beginning of an `ollama info` command diff --git a/llm/server.go b/llm/server.go index 07c58cff..8f37aa23 100644 --- a/llm/server.go +++ b/llm/server.go @@ -88,6 +88,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr var estimate MemoryEstimate var systemTotalMemory uint64 var systemFreeMemory uint64 + var systemSwapFreeMemory uint64 systemMemInfo, err := gpu.GetCPUMem() if err != nil { @@ -95,7 +96,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr } else { systemTotalMemory = systemMemInfo.TotalMemory systemFreeMemory = systemMemInfo.FreeMemory - slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", systemFreeMemory) + systemSwapFreeMemory = systemMemInfo.FreeSwap + slog.Debug("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory)) } // If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info @@ -125,9 +127,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr // On linux, over-allocating CPU memory will almost always result in an error if runtime.GOOS == "linux" { systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize - if systemMemoryRequired > systemTotalMemory { - slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "system", format.HumanBytes2(systemTotalMemory)) - return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(systemTotalMemory)) + available := min(systemTotalMemory, systemFreeMemory+systemSwapFreeMemory) + if systemMemoryRequired > available { + slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory)) + return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available)) } }