From bdd7e2f91adcf485a505b929f0d6a5d66a93e2e1 Mon Sep 17 00:00:00 2001
From: Maciej Mogilany <mogilany@gmail.com>
Date: Fri, 8 Nov 2024 15:09:12 +0100
Subject: [PATCH] rewrite for ollama 4.0

---
 discover/amd_linux.go | 101 +++++++++++++++++++++++++++++++++++++++---
 discover/gpu_linux.go |  14 ++++++
 discover/types.go     |   7 +--
 llm/server.go         |   3 +-
 4 files changed, 115 insertions(+), 10 deletions(-)

diff --git a/discover/amd_linux.go b/discover/amd_linux.go
index fad7b7a6..fe849542 100644
--- a/discover/amd_linux.go
+++ b/discover/amd_linux.go
@@ -8,6 +8,7 @@ import (
 	"io/fs"
 	"log/slog"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"regexp"
 	"slices"
@@ -30,9 +31,11 @@ const (
 	GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
 
 	// Direct Rendering Manager sysfs location
-	DRMDeviceDirGlob   = "/sys/class/drm/card*/device"
-	DRMTotalMemoryFile = "mem_info_vram_total"
-	DRMUsedMemoryFile  = "mem_info_vram_used"
+	DRMDeviceDirGlob      = "/sys/class/drm/card*/device"
+	DRMTotalMemoryFile    = "mem_info_vram_total"
+	DRMUsedMemoryFile     = "mem_info_vram_used"
+	DRMTotalMemoryFileGTT = "mem_info_gtt_total"
+	DRMUsedMemoryFileGTT  = "mem_info_gtt_used"
 
 	// In hex; properties file is in decimal
 	DRMUniqueIDFile = "unique_id"
@@ -44,8 +47,79 @@ var (
 	// Used to validate if the given ROCm lib is usable
 	ROCmLibGlobs          = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
 	RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
+
+	// APUvalidForGTT contains the list of GPU architectures that support GTT memory allocation
+	APUvalidForGTT = []string{
+		"gfx1103", // Radeon 890m, 780m, 760m, 740m (RDNA3)
+		"gfx1151", // RDNA3+
+		"gfx1152", // RDNA3+
+		"gfx1037", // Radeon 610M (RDNA2)
+		"gfx1035", // Radeon 680m, 660m (RDNA2)
+		"gfx1033", // Van Gogh (RDNA2)
+		"gfx1036", // Generic RDNA2
+		"gfx940",  // MI300A (CDNA3)
+		"gfx90c",  // Radeon Vega 7 (Ryzen 5600G)
+	}
+
+	// ApuUseGTT indicates whether GTT memory allocation is enabled for the current APU
+	ApuUseGTT bool
 )
 
+// Check for valid APU an linux kenel version to use GTT memory insted VRAM memory
+func GTTmemoryOnAPU(gfx string) (bool, error) {
+	// Check kernel version
+	cmd := exec.Command("uname", "-r")
+	output, err := cmd.Output()
+	if err != nil {
+		return false, fmt.Errorf("error executing uname command: %w", err)
+	}
+
+	fullKernelVersion := strings.TrimSpace(string(output))
+
+	// Split by "-" and take the first part, or use the whole string if no "-" is present
+	versionPart := fullKernelVersion
+	if parts := strings.SplitN(fullKernelVersion, "-", 2); len(parts) > 1 {
+		versionPart = parts[0]
+	}
+
+	versionParts := strings.Split(versionPart, ".")
+	if len(versionParts) < 3 {
+		return false, fmt.Errorf("unable to parse kernel version: %s", fullKernelVersion)
+	}
+
+	major, err := strconv.Atoi(versionParts[0])
+	if err != nil {
+		return false, fmt.Errorf("error parsing major version: %w", err)
+	}
+
+	minor, err := strconv.Atoi(versionParts[1])
+	if err != nil {
+		return false, fmt.Errorf("error parsing minor version: %w", err)
+	}
+
+	patch, err := strconv.Atoi(versionParts[2])
+	if err != nil {
+		return false, fmt.Errorf("error parsing patch version: %w", err)
+	}
+
+	kernelVersionValid := (major > 6 || (major == 6 && minor > 9) || (major == 6 && minor == 9 && patch >= 9))
+
+	gfxValid := false
+	for _, validGfx := range APUvalidForGTT {
+		if strings.Contains(gfx, validGfx) {
+			gfxValid = true
+			break
+		}
+	}
+
+	if kernelVersionValid && gfxValid {
+		slog.Debug("AMD APU valid to use GTT memory")
+	}
+
+	return kernelVersionValid && gfxValid, nil
+
+}
+
 // Gather GPU information from the amdgpu driver if any supported GPUs are detected
 // Only called once during bootstrap
 func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
@@ -235,10 +309,19 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
 			if !matched {
 				continue
 			}
-
+			ApuUseGTT, err = GTTmemoryOnAPU(fmt.Sprintf("gfx%d%x%x", major, minor, patch))
+			if err != nil {
+				slog.Debug("Error:", err)
+				continue
+			}
 			// Found the matching DRM directory
 			slog.Debug("matched", "amdgpu", match, "drm", devDir)
-			totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
+			var totalFile string
+			if ApuUseGTT {
+				totalFile = filepath.Join(devDir, DRMTotalMemoryFileGTT)
+			} else {
+				totalFile = filepath.Join(devDir, DRMTotalMemoryFile)
+			}
 			buf, err := os.ReadFile(totalFile)
 			if err != nil {
 				slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
@@ -250,7 +333,12 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
 				break
 			}
 
-			usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
+			var usedFile string
+			if ApuUseGTT {
+				usedFile = filepath.Join(devDir, DRMUsedMemoryFileGTT)
+			} else {
+				usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
+			}
 			usedMemory, err = getFreeMemory(usedFile)
 			if err != nil {
 				slog.Debug("failed to update used memory", "error", err)
@@ -285,6 +373,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
 				MinimumMemory: rocmMinimumMemory,
 				DriverMajor:   driverMajor,
 				DriverMinor:   driverMinor,
+				ApuUseGTT:     ApuUseGTT, //AMD APU use GTT for its memory
 			},
 			usedFilepath: usedFile,
 			index:        gpuID,
diff --git a/discover/gpu_linux.go b/discover/gpu_linux.go
index d636a54e..c400291d 100644
--- a/discover/gpu_linux.go
+++ b/discover/gpu_linux.go
@@ -4,6 +4,7 @@ import (
 	"bufio"
 	"fmt"
 	"io"
+	"log/slog"
 	"os"
 	"reflect"
 	"regexp"
@@ -92,6 +93,19 @@ func GetCPUMem() (memInfo, error) {
 	} else {
 		mem.FreeMemory = (free + buffers + cached) * format.KibiByte
 	}
+
+	//Do not consider RAM that can be used by GTT for AMD APUs
+	amdGPUs, err := AMDGetGPUInfo()
+	if err != nil {
+		slog.Debug("Error getting AMD GPU info: %v", err)
+	}
+
+	for _, gpuInfo := range amdGPUs {
+		if gpuInfo.ApuUseGTT {
+			mem.TotalMemory -= gpuInfo.TotalMemory
+			mem.FreeMemory -= gpuInfo.TotalMemory
+		}
+	}
 	return mem, nil
 }
 
diff --git a/discover/types.go b/discover/types.go
index b2cca109..6cc35d10 100644
--- a/discover/types.go
+++ b/discover/types.go
@@ -36,9 +36,10 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
 	UnreliableFreeMemory bool
 
 	// GPU information
-	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
-	Name    string `json:"name"`    // user friendly name if available
-	Compute string `json:"compute"` // Compute Capability or gfx
+	ID        string `json:"gpu_id"`  // string to use for selection of this specific GPU
+	Name      string `json:"name"`    // user friendly name if available
+	Compute   string `json:"compute"` // Compute Capability or gfx
+	ApuUseGTT bool   //AMD APU using GTT memory used to set -no-mmap to avoid trashing RAM, GTT use RAM
 
 	// Driver Information - TODO no need to put this on each GPU
 	DriverMajor int `json:"driver_major,omitempty"`
diff --git a/llm/server.go b/llm/server.go
index 5ca6aa32..2ccc5164 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -244,7 +244,8 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 	if (runtime.GOOS == "windows" && gpus[0].Library == "cuda" && opts.UseMMap == nil) ||
 		(runtime.GOOS == "linux" && systemFreeMemory < estimate.TotalSize && opts.UseMMap == nil) ||
 		(gpus[0].Library == "cpu" && opts.UseMMap == nil) ||
-		(opts.UseMMap != nil && !*opts.UseMMap) {
+		(opts.UseMMap != nil && !*opts.UseMMap) ||
+		(gpus[0].ApuUseGTT && opts.UseMMap != nil && !*opts.UseMMap) {
 		params = append(params, "--no-mmap")
 	}