Merge 923b3294817f9106c40e3dce050f5375141b8da1 into 67691e410db7a50b07a64858820b14de9aa91314

This commit is contained in:
Daniel Hiltgen 2024-11-14 15:57:17 +08:00 committed by GitHub
commit dd666695a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 64 additions and 18 deletions

View File

@ -24,17 +24,17 @@ all: $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
$(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS)
$(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@)
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner/cmd
$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx"
$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@)
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd
$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx avx2"
$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@)
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
@-mkdir -p $(dir $@)

View File

@ -82,7 +82,7 @@ $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS)
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
@-mkdir -p $(dir $@)
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner/cmd
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
@-mkdir -p $(dir $@)
$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@

View File

@ -1,4 +1,4 @@
package main
package runner
import (
"errors"

View File

@ -1,4 +1,4 @@
package main
package runner
import (
"testing"

7
llama/runner/cmd/cmd.go Normal file
View File

@ -0,0 +1,7 @@
package main
import "github.com/ollama/ollama/llama/runner"
func main() {
runner.RunnerMain()
}

View File

@ -1,4 +1,4 @@
package main
package runner
import (
"encoding/json"

View File

@ -1,4 +1,4 @@
package main
package runner
import (
"context"
@ -801,7 +801,7 @@ func (s *Server) loadModel(
s.ready.Done()
}
func main() {
func RunnerMain() {
mpath := flag.String("model", "", "Path to model binary file")
ppath := flag.String("mmproj", "", "Path to projector binary file")
parallel := flag.Int("parallel", 1, "Number of sequences to handle simultaneously")
@ -885,6 +885,7 @@ func main() {
listener, err := net.Listen("tcp", addr)
if err != nil {
fmt.Println("Listen error:", err)
cancel()
return
}
defer listener.Close()

View File

@ -1,4 +1,4 @@
package main
package runner
import (
"strings"

View File

@ -1,4 +1,4 @@
package main
package runner
import (
"reflect"

View File

@ -158,7 +158,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
return nil, finalErr
}
var servers []string
if cpuRunner != "" {
if cpuRunner != "" && rDir != "" {
servers = []string{cpuRunner}
} else {
servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
@ -265,6 +265,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
}
for i := range servers {
builtin := servers[i] == "builtin"
dir := availableServers[servers[i]]
if dir == "" {
// Shouldn't happen
@ -273,7 +274,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
continue
}
if strings.HasPrefix(servers[i], "cpu") {
if strings.HasPrefix(servers[i], "cpu") || (builtin && !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64")) {
gpus = discover.GetCPUInfo()
}
@ -290,7 +291,12 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
slog.Debug("ResolveTCPAddr failed ", "error", err)
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
}
finalParams := append(params, "--port", strconv.Itoa(port))
finalParams := []string{}
if builtin {
finalParams = []string{"_runner"}
}
finalParams = append(finalParams, params...)
finalParams = append(finalParams, "--port", strconv.Itoa(port))
pathEnv := "LD_LIBRARY_PATH"
if runtime.GOOS == "windows" {
@ -311,9 +317,19 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
}
server := filepath.Join(dir, "ollama_llama_server")
if runtime.GOOS == "windows" {
server += ".exe"
var server string
if builtin {
exe, err := os.Executable()
if err != nil {
slog.Warn("executable lookup failure", "error", err)
continue
}
server = exe
} else {
server = filepath.Join(dir, "ollama_llama_server")
if runtime.GOOS == "windows" {
server += ".exe"
}
}
// Detect tmp cleaners wiping out the file

View File

@ -2,12 +2,21 @@ package main
import (
"context"
"os"
"github.com/spf13/cobra"
"github.com/ollama/ollama/cmd"
"github.com/ollama/ollama/llama/runner"
)
func main() {
if len(os.Args) >= 2 {
if os.Args[1] == "_runner" {
os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
runner.RunnerMain()
return
}
}
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
}

View File

@ -105,7 +105,9 @@ func locateRunners() (string, error) {
return candidate, nil
}
}
return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
// Fall back to built-in
slog.Debug("unable to locate runners, using built-in")
return "", nil
}
// Return true if we're carying nested payloads for the runners
@ -276,6 +278,11 @@ func cleanupTmpDirs() {
// lowest common denominator
func GetAvailableServers(payloadsDir string) map[string]string {
if payloadsDir == "" {
exe, err := os.Executable()
if err == nil {
slog.Debug("Wiring up built-in runner")
return map[string]string{"builtin": filepath.Dir(exe)}
}
slog.Error("empty runner dir")
return nil
}
@ -304,6 +311,12 @@ func GetAvailableServers(payloadsDir string) map[string]string {
func ServersForGpu(info discover.GpuInfo) []string {
// glob workDir for files that start with ollama_
availableServers := GetAvailableServers(runnersDir)
// Short circuit if the only option is built-in
if _, ok := availableServers["builtin"]; ok {
return []string{"builtin"}
}
requested := info.Library
if info.Variant != discover.CPUCapabilityNone.String() {
requested += "_" + info.Variant