diff --git a/llama/make/Makefile.default b/llama/make/Makefile.default index 95b13a73..aa05d8d0 100644 --- a/llama/make/Makefile.default +++ b/llama/make/Makefile.default @@ -24,17 +24,17 @@ all: $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS) $(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS) $(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) @-mkdir -p $(dir $@) - GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner + GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner/cmd $(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx" $(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) @-mkdir -p $(dir $@) - GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner + GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd $(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx avx2" $(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) @-mkdir -p $(dir $@) - GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner + GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% @-mkdir -p $(dir $@) diff --git a/llama/make/gpu.make b/llama/make/gpu.make index 939fa41a..cb154265 100644 --- a/llama/make/gpu.make +++ b/llama/make/gpu.make @@ -82,7 +82,7 @@ $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS) $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) @-mkdir -p $(dir $@) - GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner + GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner/cmd $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS) @-mkdir -p $(dir $@) $(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@ diff --git a/llama/runner/cache.go b/llama/runner/cache.go index ef8f6cfb..3baa305b 100644 --- a/llama/runner/cache.go +++ b/llama/runner/cache.go @@ -1,4 +1,4 @@ -package main +package runner import ( "errors" diff --git a/llama/runner/cache_test.go b/llama/runner/cache_test.go index cc13b5f2..30e74d96 100644 --- a/llama/runner/cache_test.go +++ b/llama/runner/cache_test.go @@ -1,4 +1,4 @@ -package main +package runner import ( "reflect" diff --git a/llama/runner/cmd/cmd.go b/llama/runner/cmd/cmd.go new file mode 100644 index 00000000..26b56af6 --- /dev/null +++ b/llama/runner/cmd/cmd.go @@ -0,0 +1,7 @@ +package main + +import "github.com/ollama/ollama/llama/runner" + +func main() { + runner.RunnerMain() +} diff --git a/llama/runner/requirements.go b/llama/runner/requirements.go index 71b3b9aa..4511559d 100644 --- a/llama/runner/requirements.go +++ b/llama/runner/requirements.go @@ -1,4 +1,4 @@ -package main +package runner import ( "encoding/json" diff --git a/llama/runner/runner.go b/llama/runner/runner.go index f472d076..29183faf 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -1,4 +1,4 @@ -package main +package runner import ( "context" @@ -827,7 +827,7 @@ func (s *Server) loadModel( s.ready.Done() } -func main() { +func RunnerMain() { mpath := flag.String("model", "", "Path to model binary file") ppath := flag.String("mmproj", "", "Path to projector binary file") parallel := flag.Int("parallel", 1, "Number of sequences to handle simultaneously") @@ -917,6 +917,7 @@ func main() { listener, err := net.Listen("tcp", addr) if err != nil { fmt.Println("Listen error:", err) + cancel() return } defer listener.Close() diff --git a/llama/runner/stop.go b/llama/runner/stop.go index c05f5e3d..8dcb08d3 100644 --- a/llama/runner/stop.go +++ b/llama/runner/stop.go @@ -1,4 +1,4 @@ -package main +package runner import ( "strings" diff --git a/llama/runner/stop_test.go b/llama/runner/stop_test.go index 51b35fde..31dc161f 100644 --- a/llama/runner/stop_test.go +++ b/llama/runner/stop_test.go @@ -1,4 +1,4 @@ -package main +package runner import ( "reflect" diff --git a/llm/server.go b/llm/server.go index a4c99dd9..8b5b0b76 100644 --- a/llm/server.go +++ b/llm/server.go @@ -158,7 +158,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter return nil, finalErr } var servers []string - if cpuRunner != "" { + if cpuRunner != "" && rDir != "" { servers = []string{cpuRunner} } else { servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant @@ -270,6 +270,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter } for i := range servers { + builtin := servers[i] == "builtin" dir := availableServers[servers[i]] if dir == "" { // Shouldn't happen @@ -278,7 +279,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter continue } - if strings.HasPrefix(servers[i], "cpu") { + if strings.HasPrefix(servers[i], "cpu") || (builtin && !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64")) { gpus = discover.GetCPUInfo() } @@ -295,7 +296,12 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter slog.Debug("ResolveTCPAddr failed ", "error", err) port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range } - finalParams := append(params, "--port", strconv.Itoa(port)) + finalParams := []string{} + if builtin { + finalParams = []string{"_runner"} + } + finalParams = append(finalParams, params...) + finalParams = append(finalParams, "--port", strconv.Itoa(port)) pathEnv := "LD_LIBRARY_PATH" if runtime.GOOS == "windows" { @@ -316,9 +322,19 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...) } - server := filepath.Join(dir, "ollama_llama_server") - if runtime.GOOS == "windows" { - server += ".exe" + var server string + if builtin { + exe, err := os.Executable() + if err != nil { + slog.Warn("executable lookup failure", "error", err) + continue + } + server = exe + } else { + server = filepath.Join(dir, "ollama_llama_server") + if runtime.GOOS == "windows" { + server += ".exe" + } } // Detect tmp cleaners wiping out the file diff --git a/main.go b/main.go index 650e03a6..3f07eb40 100644 --- a/main.go +++ b/main.go @@ -2,12 +2,21 @@ package main import ( "context" + "os" "github.com/spf13/cobra" "github.com/ollama/ollama/cmd" + "github.com/ollama/ollama/llama/runner" ) func main() { + if len(os.Args) >= 2 { + if os.Args[1] == "_runner" { + os.Args = append([]string{os.Args[0]}, os.Args[2:]...) + runner.RunnerMain() + return + } + } cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background())) } diff --git a/runners/common.go b/runners/common.go index 19014d75..c26a208c 100644 --- a/runners/common.go +++ b/runners/common.go @@ -105,7 +105,9 @@ func locateRunners() (string, error) { return candidate, nil } } - return "", fmt.Errorf("unable to locate runners in any search path %v", paths) + // Fall back to built-in + slog.Debug("unable to locate runners, using built-in") + return "", nil } // Return true if we're carying nested payloads for the runners @@ -276,6 +278,11 @@ func cleanupTmpDirs() { // lowest common denominator func GetAvailableServers(payloadsDir string) map[string]string { if payloadsDir == "" { + exe, err := os.Executable() + if err == nil { + slog.Debug("Wiring up built-in runner") + return map[string]string{"builtin": filepath.Dir(exe)} + } slog.Error("empty runner dir") return nil } @@ -304,6 +311,12 @@ func GetAvailableServers(payloadsDir string) map[string]string { func ServersForGpu(info discover.GpuInfo) []string { // glob workDir for files that start with ollama_ availableServers := GetAvailableServers(runnersDir) + + // Short circuit if the only option is built-in + if _, ok := availableServers["builtin"]; ok { + return []string{"builtin"} + } + requested := info.Library if info.Variant != discover.CPUCapabilityNone.String() { requested += "_" + info.Variant