Merge bdd7e2f91adcf485a505b929f0d6a5d66a93e2e1 into 67691e410db7a50b07a64858820b14de9aa91314
This commit is contained in:
commit
e9002f9ffc
@ -8,6 +8,7 @@ import (
|
|||||||
"io/fs"
|
"io/fs"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
@ -30,9 +31,11 @@ const (
|
|||||||
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
|
GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
|
||||||
|
|
||||||
// Direct Rendering Manager sysfs location
|
// Direct Rendering Manager sysfs location
|
||||||
DRMDeviceDirGlob = "/sys/class/drm/card*/device"
|
DRMDeviceDirGlob = "/sys/class/drm/card*/device"
|
||||||
DRMTotalMemoryFile = "mem_info_vram_total"
|
DRMTotalMemoryFile = "mem_info_vram_total"
|
||||||
DRMUsedMemoryFile = "mem_info_vram_used"
|
DRMUsedMemoryFile = "mem_info_vram_used"
|
||||||
|
DRMTotalMemoryFileGTT = "mem_info_gtt_total"
|
||||||
|
DRMUsedMemoryFileGTT = "mem_info_gtt_used"
|
||||||
|
|
||||||
// In hex; properties file is in decimal
|
// In hex; properties file is in decimal
|
||||||
DRMUniqueIDFile = "unique_id"
|
DRMUniqueIDFile = "unique_id"
|
||||||
@ -44,8 +47,79 @@ var (
|
|||||||
// Used to validate if the given ROCm lib is usable
|
// Used to validate if the given ROCm lib is usable
|
||||||
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
|
ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
|
||||||
RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
|
RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
|
||||||
|
|
||||||
|
// APUvalidForGTT contains the list of GPU architectures that support GTT memory allocation
|
||||||
|
APUvalidForGTT = []string{
|
||||||
|
"gfx1103", // Radeon 890m, 780m, 760m, 740m (RDNA3)
|
||||||
|
"gfx1151", // RDNA3+
|
||||||
|
"gfx1152", // RDNA3+
|
||||||
|
"gfx1037", // Radeon 610M (RDNA2)
|
||||||
|
"gfx1035", // Radeon 680m, 660m (RDNA2)
|
||||||
|
"gfx1033", // Van Gogh (RDNA2)
|
||||||
|
"gfx1036", // Generic RDNA2
|
||||||
|
"gfx940", // MI300A (CDNA3)
|
||||||
|
"gfx90c", // Radeon Vega 7 (Ryzen 5600G)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApuUseGTT indicates whether GTT memory allocation is enabled for the current APU
|
||||||
|
ApuUseGTT bool
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Check for valid APU an linux kenel version to use GTT memory insted VRAM memory
|
||||||
|
func GTTmemoryOnAPU(gfx string) (bool, error) {
|
||||||
|
// Check kernel version
|
||||||
|
cmd := exec.Command("uname", "-r")
|
||||||
|
output, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("error executing uname command: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fullKernelVersion := strings.TrimSpace(string(output))
|
||||||
|
|
||||||
|
// Split by "-" and take the first part, or use the whole string if no "-" is present
|
||||||
|
versionPart := fullKernelVersion
|
||||||
|
if parts := strings.SplitN(fullKernelVersion, "-", 2); len(parts) > 1 {
|
||||||
|
versionPart = parts[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
versionParts := strings.Split(versionPart, ".")
|
||||||
|
if len(versionParts) < 3 {
|
||||||
|
return false, fmt.Errorf("unable to parse kernel version: %s", fullKernelVersion)
|
||||||
|
}
|
||||||
|
|
||||||
|
major, err := strconv.Atoi(versionParts[0])
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("error parsing major version: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
minor, err := strconv.Atoi(versionParts[1])
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("error parsing minor version: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
patch, err := strconv.Atoi(versionParts[2])
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("error parsing patch version: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
kernelVersionValid := (major > 6 || (major == 6 && minor > 9) || (major == 6 && minor == 9 && patch >= 9))
|
||||||
|
|
||||||
|
gfxValid := false
|
||||||
|
for _, validGfx := range APUvalidForGTT {
|
||||||
|
if strings.Contains(gfx, validGfx) {
|
||||||
|
gfxValid = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if kernelVersionValid && gfxValid {
|
||||||
|
slog.Debug("AMD APU valid to use GTT memory")
|
||||||
|
}
|
||||||
|
|
||||||
|
return kernelVersionValid && gfxValid, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
||||||
// Only called once during bootstrap
|
// Only called once during bootstrap
|
||||||
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
||||||
@ -235,10 +309,19 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
if !matched {
|
if !matched {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
ApuUseGTT, err = GTTmemoryOnAPU(fmt.Sprintf("gfx%d%x%x", major, minor, patch))
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("Error:", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
// Found the matching DRM directory
|
// Found the matching DRM directory
|
||||||
slog.Debug("matched", "amdgpu", match, "drm", devDir)
|
slog.Debug("matched", "amdgpu", match, "drm", devDir)
|
||||||
totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
|
var totalFile string
|
||||||
|
if ApuUseGTT {
|
||||||
|
totalFile = filepath.Join(devDir, DRMTotalMemoryFileGTT)
|
||||||
|
} else {
|
||||||
|
totalFile = filepath.Join(devDir, DRMTotalMemoryFile)
|
||||||
|
}
|
||||||
buf, err := os.ReadFile(totalFile)
|
buf, err := os.ReadFile(totalFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
|
slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
|
||||||
@ -250,7 +333,12 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
|
var usedFile string
|
||||||
|
if ApuUseGTT {
|
||||||
|
usedFile = filepath.Join(devDir, DRMUsedMemoryFileGTT)
|
||||||
|
} else {
|
||||||
|
usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
|
||||||
|
}
|
||||||
usedMemory, err = getFreeMemory(usedFile)
|
usedMemory, err = getFreeMemory(usedFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug("failed to update used memory", "error", err)
|
slog.Debug("failed to update used memory", "error", err)
|
||||||
@ -285,6 +373,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
MinimumMemory: rocmMinimumMemory,
|
MinimumMemory: rocmMinimumMemory,
|
||||||
DriverMajor: driverMajor,
|
DriverMajor: driverMajor,
|
||||||
DriverMinor: driverMinor,
|
DriverMinor: driverMinor,
|
||||||
|
ApuUseGTT: ApuUseGTT, //AMD APU use GTT for its memory
|
||||||
},
|
},
|
||||||
usedFilepath: usedFile,
|
usedFilepath: usedFile,
|
||||||
index: gpuID,
|
index: gpuID,
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"regexp"
|
"regexp"
|
||||||
@ -92,6 +93,19 @@ func GetCPUMem() (memInfo, error) {
|
|||||||
} else {
|
} else {
|
||||||
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
mem.FreeMemory = (free + buffers + cached) * format.KibiByte
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Do not consider RAM that can be used by GTT for AMD APUs
|
||||||
|
amdGPUs, err := AMDGetGPUInfo()
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("Error getting AMD GPU info: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, gpuInfo := range amdGPUs {
|
||||||
|
if gpuInfo.ApuUseGTT {
|
||||||
|
mem.TotalMemory -= gpuInfo.TotalMemory
|
||||||
|
mem.FreeMemory -= gpuInfo.TotalMemory
|
||||||
|
}
|
||||||
|
}
|
||||||
return mem, nil
|
return mem, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,9 +36,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
|||||||
UnreliableFreeMemory bool
|
UnreliableFreeMemory bool
|
||||||
|
|
||||||
// GPU information
|
// GPU information
|
||||||
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
||||||
Name string `json:"name"` // user friendly name if available
|
Name string `json:"name"` // user friendly name if available
|
||||||
Compute string `json:"compute"` // Compute Capability or gfx
|
Compute string `json:"compute"` // Compute Capability or gfx
|
||||||
|
ApuUseGTT bool //AMD APU using GTT memory used to set -no-mmap to avoid trashing RAM, GTT use RAM
|
||||||
|
|
||||||
// Driver Information - TODO no need to put this on each GPU
|
// Driver Information - TODO no need to put this on each GPU
|
||||||
DriverMajor int `json:"driver_major,omitempty"`
|
DriverMajor int `json:"driver_major,omitempty"`
|
||||||
|
@ -244,7 +244,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
|||||||
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
|
if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
|
||||||
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
|
(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
|
||||||
(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
|
(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
|
||||||
(opts.UseMMap != nil && !*opts.UseMMap) {
|
(opts.UseMMap != nil && !*opts.UseMMap) ||
|
||||||
|
(gpus[0].ApuUseGTT && opts.UseMMap != nil && !*opts.UseMMap) {
|
||||||
params = append(params, "--no-mmap")
|
params = append(params, "--no-mmap")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user