Merge 923b3294817f9106c40e3dce050f5375141b8da1 into 67691e410db7a50b07a64858820b14de9aa91314
This commit is contained in:
commit
dd666695a9
@ -24,17 +24,17 @@ all: $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
|
||||
$(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS)
|
||||
$(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner
|
||||
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ./runner/cmd
|
||||
|
||||
$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx"
|
||||
$(RUNNERS_BUILD_DIR)/cpu_avx/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner
|
||||
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd
|
||||
|
||||
$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS="avx avx2"
|
||||
$(RUNNERS_BUILD_DIR)/cpu_avx2/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner
|
||||
GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(TARGET_CPU_FLAGS)) -o $@ ./runner/cmd
|
||||
|
||||
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
||||
@-mkdir -p $(dir $@)
|
||||
|
@ -82,7 +82,7 @@ $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS)
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
|
||||
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner/cmd
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
|
||||
|
@ -1,4 +1,4 @@
|
||||
package main
|
||||
package runner
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
@ -1,4 +1,4 @@
|
||||
package main
|
||||
package runner
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
7
llama/runner/cmd/cmd.go
Normal file
7
llama/runner/cmd/cmd.go
Normal file
@ -0,0 +1,7 @@
|
||||
package main
|
||||
|
||||
import "github.com/ollama/ollama/llama/runner"
|
||||
|
||||
func main() {
|
||||
runner.RunnerMain()
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package main
|
||||
package runner
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
@ -1,4 +1,4 @@
|
||||
package main
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
@ -801,7 +801,7 @@ func (s *Server) loadModel(
|
||||
s.ready.Done()
|
||||
}
|
||||
|
||||
func main() {
|
||||
func RunnerMain() {
|
||||
mpath := flag.String("model", "", "Path to model binary file")
|
||||
ppath := flag.String("mmproj", "", "Path to projector binary file")
|
||||
parallel := flag.Int("parallel", 1, "Number of sequences to handle simultaneously")
|
||||
@ -885,6 +885,7 @@ func main() {
|
||||
listener, err := net.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
fmt.Println("Listen error:", err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
defer listener.Close()
|
||||
|
@ -1,4 +1,4 @@
|
||||
package main
|
||||
package runner
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
@ -1,4 +1,4 @@
|
||||
package main
|
||||
package runner
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
@ -158,7 +158,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
return nil, finalErr
|
||||
}
|
||||
var servers []string
|
||||
if cpuRunner != "" {
|
||||
if cpuRunner != "" && rDir != "" {
|
||||
servers = []string{cpuRunner}
|
||||
} else {
|
||||
servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
|
||||
@ -265,6 +265,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
}
|
||||
|
||||
for i := range servers {
|
||||
builtin := servers[i] == "builtin"
|
||||
dir := availableServers[servers[i]]
|
||||
if dir == "" {
|
||||
// Shouldn't happen
|
||||
@ -273,7 +274,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(servers[i], "cpu") {
|
||||
if strings.HasPrefix(servers[i], "cpu") || (builtin && !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64")) {
|
||||
gpus = discover.GetCPUInfo()
|
||||
}
|
||||
|
||||
@ -290,7 +291,12 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
slog.Debug("ResolveTCPAddr failed ", "error", err)
|
||||
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
||||
}
|
||||
finalParams := append(params, "--port", strconv.Itoa(port))
|
||||
finalParams := []string{}
|
||||
if builtin {
|
||||
finalParams = []string{"_runner"}
|
||||
}
|
||||
finalParams = append(finalParams, params...)
|
||||
finalParams = append(finalParams, "--port", strconv.Itoa(port))
|
||||
|
||||
pathEnv := "LD_LIBRARY_PATH"
|
||||
if runtime.GOOS == "windows" {
|
||||
@ -311,9 +317,19 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
||||
libraryPaths = append(gpus[0].DependencyPath, libraryPaths...)
|
||||
}
|
||||
|
||||
server := filepath.Join(dir, "ollama_llama_server")
|
||||
if runtime.GOOS == "windows" {
|
||||
server += ".exe"
|
||||
var server string
|
||||
if builtin {
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
slog.Warn("executable lookup failure", "error", err)
|
||||
continue
|
||||
}
|
||||
server = exe
|
||||
} else {
|
||||
server = filepath.Join(dir, "ollama_llama_server")
|
||||
if runtime.GOOS == "windows" {
|
||||
server += ".exe"
|
||||
}
|
||||
}
|
||||
|
||||
// Detect tmp cleaners wiping out the file
|
||||
|
9
main.go
9
main.go
@ -2,12 +2,21 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
"github.com/ollama/ollama/llama/runner"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) >= 2 {
|
||||
if os.Args[1] == "_runner" {
|
||||
os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
|
||||
runner.RunnerMain()
|
||||
return
|
||||
}
|
||||
}
|
||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
|
||||
}
|
||||
|
@ -105,7 +105,9 @@ func locateRunners() (string, error) {
|
||||
return candidate, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
|
||||
// Fall back to built-in
|
||||
slog.Debug("unable to locate runners, using built-in")
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// Return true if we're carying nested payloads for the runners
|
||||
@ -276,6 +278,11 @@ func cleanupTmpDirs() {
|
||||
// lowest common denominator
|
||||
func GetAvailableServers(payloadsDir string) map[string]string {
|
||||
if payloadsDir == "" {
|
||||
exe, err := os.Executable()
|
||||
if err == nil {
|
||||
slog.Debug("Wiring up built-in runner")
|
||||
return map[string]string{"builtin": filepath.Dir(exe)}
|
||||
}
|
||||
slog.Error("empty runner dir")
|
||||
return nil
|
||||
}
|
||||
@ -304,6 +311,12 @@ func GetAvailableServers(payloadsDir string) map[string]string {
|
||||
func ServersForGpu(info discover.GpuInfo) []string {
|
||||
// glob workDir for files that start with ollama_
|
||||
availableServers := GetAvailableServers(runnersDir)
|
||||
|
||||
// Short circuit if the only option is built-in
|
||||
if _, ok := availableServers["builtin"]; ok {
|
||||
return []string{"builtin"}
|
||||
}
|
||||
|
||||
requested := info.Library
|
||||
if info.Variant != discover.CPUCapabilityNone.String() {
|
||||
requested += "_" + info.Variant
|
||||
|
Loading…
x
Reference in New Issue
Block a user