llama: wire up builtin runner

This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build.  After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
This commit is contained in:
Daniel Hiltgen 2024-09-26 15:21:33 -07:00
parent 078f666f73
commit 923b329481
12 changed files with 64 additions and 18 deletions

View File

@ -24,17 +24,17 @@ all: $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
$(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS) $(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS)
$(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) $(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner/cmd
$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx" $(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx"
$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) $(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd
$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx avx2" $(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx avx2"
$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) $(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)

View File

@ -82,7 +82,7 @@ $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS) $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS)
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner/cmd
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS) $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
$(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@ $(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@

View File

@ -1,4 +1,4 @@
package main package runner
import ( import (
"errors" "errors"

View File

@ -1,4 +1,4 @@
package main package runner
import ( import (
"reflect" "reflect"

7
llama/runner/cmd/cmd.go Normal file
View File

@ -0,0 +1,7 @@
package main
import "github.com/ollama/ollama/llama/runner"
func main() {
runner.RunnerMain()
}

View File

@ -1,4 +1,4 @@
package main package runner
import ( import (
"encoding/json" "encoding/json"

View File

@ -1,4 +1,4 @@
package main package runner
import ( import (
"context" "context"
@ -827,7 +827,7 @@ func (s *Server) loadModel(
s.ready.Done() s.ready.Done()
} }
func main() { func RunnerMain() {
mpath := flag.String("model", "", "Path to model binary file") mpath := flag.String("model", "", "Path to model binary file")
ppath := flag.String("mmproj", "", "Path to projector binary file") ppath := flag.String("mmproj", "", "Path to projector binary file")
parallel := flag.Int("parallel", 1, "Number of sequences to handle simultaneously") parallel := flag.Int("parallel", 1, "Number of sequences to handle simultaneously")
@ -917,6 +917,7 @@ func main() {
listener, err := net.Listen("tcp", addr) listener, err := net.Listen("tcp", addr)
if err != nil { if err != nil {
fmt.Println("Listen error:", err) fmt.Println("Listen error:", err)
cancel()
return return
} }
defer listener.Close() defer listener.Close()

View File

@ -1,4 +1,4 @@
package main package runner
import ( import (
"strings" "strings"

View File

@ -1,4 +1,4 @@
package main package runner
import ( import (
"reflect" "reflect"

View File

@ -158,7 +158,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
return nil, finalErr return nil, finalErr
} }
var servers []string var servers []string
if cpuRunner != "" { if cpuRunner != "" && rDir != "" {
servers = []string{cpuRunner} servers = []string{cpuRunner}
} else { } else {
servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
@ -270,6 +270,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
} }
for i := range servers { for i := range servers {
builtin := servers[i] == "builtin"
dir := availableServers[servers[i]] dir := availableServers[servers[i]]
if dir == "" { if dir == "" {
// Shouldn't happen // Shouldn't happen
@ -278,7 +279,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
continue continue
} }
if strings.HasPrefix(servers[i], "cpu") { if strings.HasPrefix(servers[i], "cpu") || (builtin && !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64")) {
gpus = discover.GetCPUInfo() gpus = discover.GetCPUInfo()
} }
@ -295,7 +296,12 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
slog.Debug("ResolveTCPAddr failed ", "error", err) slog.Debug("ResolveTCPAddr failed ", "error", err)
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
} }
finalParams := append(params, "--port", strconv.Itoa(port)) finalParams := []string{}
if builtin {
finalParams = []string{"_runner"}
}
finalParams = append(finalParams, params...)
finalParams = append(finalParams, "--port", strconv.Itoa(port))
pathEnv := "LD_LIBRARY_PATH" pathEnv := "LD_LIBRARY_PATH"
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
@ -316,9 +322,19 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...) libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...)
} }
server := filepath.Join(dir, "ollama_llama_server") var server string
if runtime.GOOS == "windows" { if builtin {
server += ".exe" exe, err := os.Executable()
if err != nil {
slog.Warn("executable lookup failure", "error", err)
continue
}
server = exe
} else {
server = filepath.Join(dir, "ollama_llama_server")
if runtime.GOOS == "windows" {
server += ".exe"
}
} }
// Detect tmp cleaners wiping out the file // Detect tmp cleaners wiping out the file

View File

@ -2,12 +2,21 @@ package main
import ( import (
"context" "context"
"os"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/ollama/ollama/cmd" "github.com/ollama/ollama/cmd"
"github.com/ollama/ollama/llama/runner"
) )
func main() { func main() {
if len(os.Args) >= 2 {
if os.Args[1] == "_runner" {
os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
runner.RunnerMain()
return
}
}
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background())) cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
} }

View File

@ -105,7 +105,9 @@ func locateRunners() (string, error) {
return candidate, nil return candidate, nil
} }
} }
return "", fmt.Errorf("unable to locate runners in any search path %v", paths) // Fall back to built-in
slog.Debug("unable to locate runners, using built-in")
return "", nil
} }
// Return true if we're carying nested payloads for the runners // Return true if we're carying nested payloads for the runners
@ -276,6 +278,11 @@ func cleanupTmpDirs() {
// lowest common denominator // lowest common denominator
func GetAvailableServers(payloadsDir string) map[string]string { func GetAvailableServers(payloadsDir string) map[string]string {
if payloadsDir == "" { if payloadsDir == "" {
exe, err := os.Executable()
if err == nil {
slog.Debug("Wiring up built-in runner")
return map[string]string{"builtin": filepath.Dir(exe)}
}
slog.Error("empty runner dir") slog.Error("empty runner dir")
return nil return nil
} }
@ -304,6 +311,12 @@ func GetAvailableServers(payloadsDir string) map[string]string {
func ServersForGpu(info discover.GpuInfo) []string { func ServersForGpu(info discover.GpuInfo) []string {
// glob workDir for files that start with ollama_ // glob workDir for files that start with ollama_
availableServers := GetAvailableServers(runnersDir) availableServers := GetAvailableServers(runnersDir)
// Short circuit if the only option is built-in
if _, ok := availableServers["builtin"]; ok {
return []string{"builtin"}
}
requested := info.Library requested := info.Library
if info.Variant != discover.CPUCapabilityNone.String() { if info.Variant != discover.CPUCapabilityNone.String() {
requested += "_" + info.Variant requested += "_" + info.Variant