rewrite for ollama 4.0
This commit is contained in:
parent
3d25e7bf8c
commit
bdd7e2f91a
@ -8,6 +8,7 @@ import (
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"slices"
|
||||
@ -30,9 +31,11 @@ const (
|
||||
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
|
||||
|
||||
// Direct Rendering Manager sysfs location
|
||||
DRMDeviceDirGlob = "/sys/class/drm/card*/device"
|
||||
DRMTotalMemoryFile = "mem_info_vram_total"
|
||||
DRMUsedMemoryFile = "mem_info_vram_used"
|
||||
DRMDeviceDirGlob = "/sys/class/drm/card*/device"
|
||||
DRMTotalMemoryFile = "mem_info_vram_total"
|
||||
DRMUsedMemoryFile = "mem_info_vram_used"
|
||||
DRMTotalMemoryFileGTT = "mem_info_gtt_total"
|
||||
DRMUsedMemoryFileGTT = "mem_info_gtt_used"
|
||||
|
||||
// In hex; properties file is in decimal
|
||||
DRMUniqueIDFile = "unique_id"
|
||||
@ -44,8 +47,79 @@ var (
|
||||
// Used to validate if the given ROCm lib is usable
|
||||
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
|
||||
RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
|
||||
|
||||
// APUvalidForGTT contains the list of GPU architectures that support GTT memory allocation
|
||||
APUvalidForGTT = []string{
|
||||
"gfx1103", // Radeon 890m, 780m, 760m, 740m (RDNA3)
|
||||
"gfx1151", // RDNA3+
|
||||
"gfx1152", // RDNA3+
|
||||
"gfx1037", // Radeon 610M (RDNA2)
|
||||
"gfx1035", // Radeon 680m, 660m (RDNA2)
|
||||
"gfx1033", // Van Gogh (RDNA2)
|
||||
"gfx1036", // Generic RDNA2
|
||||
"gfx940", // MI300A (CDNA3)
|
||||
"gfx90c", // Radeon Vega 7 (Ryzen 5600G)
|
||||
}
|
||||
|
||||
// ApuUseGTT indicates whether GTT memory allocation is enabled for the current APU
|
||||
ApuUseGTT bool
|
||||
)
|
||||
|
||||
// Check for valid APU an linux kenel version to use GTT memory insted VRAM memory
|
||||
func GTTmemoryOnAPU(gfx string) (bool, error) {
|
||||
// Check kernel version
|
||||
cmd := exec.Command("uname", "-r")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error executing uname command: %w", err)
|
||||
}
|
||||
|
||||
fullKernelVersion := strings.TrimSpace(string(output))
|
||||
|
||||
// Split by "-" and take the first part, or use the whole string if no "-" is present
|
||||
versionPart := fullKernelVersion
|
||||
if parts := strings.SplitN(fullKernelVersion, "-", 2); len(parts) > 1 {
|
||||
versionPart = parts[0]
|
||||
}
|
||||
|
||||
versionParts := strings.Split(versionPart, ".")
|
||||
if len(versionParts) < 3 {
|
||||
return false, fmt.Errorf("unable to parse kernel version: %s", fullKernelVersion)
|
||||
}
|
||||
|
||||
major, err := strconv.Atoi(versionParts[0])
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error parsing major version: %w", err)
|
||||
}
|
||||
|
||||
minor, err := strconv.Atoi(versionParts[1])
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error parsing minor version: %w", err)
|
||||
}
|
||||
|
||||
patch, err := strconv.Atoi(versionParts[2])
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error parsing patch version: %w", err)
|
||||
}
|
||||
|
||||
kernelVersionValid := (major > 6 || (major == 6 && minor > 9) || (major == 6 && minor == 9 && patch >= 9))
|
||||
|
||||
gfxValid := false
|
||||
for _, validGfx := range APUvalidForGTT {
|
||||
if strings.Contains(gfx, validGfx) {
|
||||
gfxValid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if kernelVersionValid && gfxValid {
|
||||
slog.Debug("AMD APU valid to use GTT memory")
|
||||
}
|
||||
|
||||
return kernelVersionValid && gfxValid, nil
|
||||
|
||||
}
|
||||
|
||||
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
||||
// Only called once during bootstrap
|
||||
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||
@ -235,10 +309,19 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||
if !matched {
|
||||
continue
|
||||
}
|
||||
|
||||
ApuUseGTT, err = GTTmemoryOnAPU(fmt.Sprintf("gfx%d%x%x", major, minor, patch))
|
||||
if err != nil {
|
||||
slog.Debug("Error:", err)
|
||||
continue
|
||||
}
|
||||
// Found the matching DRM directory
|
||||
slog.Debug("matched", "amdgpu", match, "drm", devDir)
|
||||
totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
|
||||
var totalFile string
|
||||
if ApuUseGTT {
|
||||
totalFile = filepath.Join(devDir, DRMTotalMemoryFileGTT)
|
||||
} else {
|
||||
totalFile = filepath.Join(devDir, DRMTotalMemoryFile)
|
||||
}
|
||||
buf, err := os.ReadFile(totalFile)
|
||||
if err != nil {
|
||||
slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
|
||||
@ -250,7 +333,12 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||
break
|
||||
}
|
||||
|
||||
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
|
||||
var usedFile string
|
||||
if ApuUseGTT {
|
||||
usedFile = filepath.Join(devDir, DRMUsedMemoryFileGTT)
|
||||
} else {
|
||||
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
|
||||
}
|
||||
usedMemory, err = getFreeMemory(usedFile)
|
||||
if err != nil {
|
||||
slog.Debug("failed to update used memory", "error", err)
|
||||
@ -285,6 +373,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||
MinimumMemory: rocmMinimumMemory,
|
||||
DriverMajor: driverMajor,
|
||||
DriverMinor: driverMinor,
|
||||
ApuUseGTT: ApuUseGTT, //AMD APU use GTT for its memory
|
||||
},
|
||||
usedFilepath: usedFile,
|
||||
index: gpuID,
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"reflect"
|
||||
"regexp"
|
||||
@ -92,6 +93,19 @@ func GetCPUMem() (memInfo, error) {
|
||||
} else {
|
||||
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
||||
}
|
||||
|
||||
//Do not consider RAM that can be used by GTT for AMD APUs
|
||||
amdGPUs, err := AMDGetGPUInfo()
|
||||
if err != nil {
|
||||
slog.Debug("Error getting AMD GPU info: %v", err)
|
||||
}
|
||||
|
||||
for _, gpuInfo := range amdGPUs {
|
||||
if gpuInfo.ApuUseGTT {
|
||||
mem.TotalMemory -= gpuInfo.TotalMemory
|
||||
mem.FreeMemory -= gpuInfo.TotalMemory
|
||||
}
|
||||
}
|
||||
return mem, nil
|
||||
}
|
||||
|
||||
|
@ -36,9 +36,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
||||
UnreliableFreeMemory bool
|
||||
|
||||
// GPU information
|
||||
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
||||
Name string `json:"name"` // user friendly name if available
|
||||
Compute string `json:"compute"` // Compute Capability or gfx
|
||||
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
||||
Name string `json:"name"` // user friendly name if available
|
||||
Compute string `json:"compute"` // Compute Capability or gfx
|
||||
ApuUseGTT bool //AMD APU using GTT memory used to set -no-mmap to avoid trashing RAM, GTT use RAM
|
||||
|
||||
// Driver Information - TODO no need to put this on each GPU
|
||||
DriverMajor int `json:"driver_major,omitempty"`
|
||||
|
@ -244,7 +244,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
|
||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
|
||||
(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
|
||||
(opts.UseMMap != nil && !*opts.UseMMap) {
|
||||
(opts.UseMMap != nil && !*opts.UseMMap) ||
|
||||
(gpus[0].ApuUseGTT && opts.UseMMap != nil && !*opts.UseMMap) {
|
||||
params = append(params, "--no-mmap")
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user