diff --git a/Makefile b/Makefile index f59e072c..bd4649eb 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ GOALS := $(or $(MAKECMDGOALS),all) .PHONY: $(GOALS) $(GOALS): - $(MAKE) -C llama $@ \ No newline at end of file + @$(MAKE) --no-print-directory -C llama $@ \ No newline at end of file diff --git a/docs/development.md b/docs/development.md index 13457ae3..3520dc79 100644 --- a/docs/development.md +++ b/docs/development.md @@ -3,35 +3,24 @@ Install required tools: - go version 1.22 or higher -- gcc version 11.4.0 or higher +- OS specific native compiler (see below) +- GNU Make +## Overview + +Ollama uses a mix of Go and C/C++ code to interface with GPUs. The C/C++ code is compiled with both CGO and GPU library specific compilers. A set of GNU Makefiles are used to compile the project. GPU Libraries are auto-detected based on the typical environment variables used by the respective libraries, but can be overridden if necessary. The default make target will build the runners and primary Go Ollama application. Throughout the examples below '-j 5' is suggested for 5 parallel jobs to speed up the build. You can adjust the job count based on your CPU Core count to optimize build times. To learn more about the other make targets use 'make help' + +Once you have built the GPU/CPU runners, you can compile the main application with `go build .` + ### MacOS [Download Go](https://go.dev/dl/) -Optionally enable debugging and more verbose logging: - -```bash -# At build time -export CGO_CFLAGS="-g" - -# At runtime -export OLLAMA_DEBUG=1 -``` - -Get the required libraries and build the native LLM code: (Adjust the job count based on your number of processors for a faster build) - ```bash make -j 5 ``` -Then build ollama: - -```bash -go build . -``` - Now you can run `ollama`: ```bash @@ -51,49 +40,33 @@ _Your operating system distribution may already have packages for NVIDIA CUDA. D Install `make`, `gcc` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) development and runtime packages. -Typically the build scripts will auto-detect CUDA, however, if your Linux distro -or installation approach uses unusual paths, you can specify the location by -specifying an environment variable `CUDA_LIB_DIR` to the location of the shared -libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize -a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70") - -Then generate dependencies: (Adjust the job count based on your number of processors for a faster build) +Typically the makefile will auto-detect CUDA, however, if your Linux distro +or installation approach uses alternative paths, you can specify the location by +overriding `CUDA_PATH` to the location of the CUDA toolkit. You can customize +a set of target CUDA architectures by setting `CUDA_ARCHITECTURES` (e.g. `CUDA_ARCHITECTURES=50;60;70`) ``` make -j 5 ``` -Then build the binary: - -``` -go build . -``` +If both v11 and v12 tookkits are detected, runners for both major versions will be built by default. You can build just v12 with `make cuda_v12` #### Linux ROCm (AMD) -_Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_ +_Your operating system distribution may already have packages for AMD ROCm. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!_ -Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/) development packages first, as well as `make`, `gcc`, and `golang`. +Install [ROCm](https://rocm.docs.amd.com/en/latest/) development packages first, as well as `make`, `gcc`, and `golang`. Typically the build scripts will auto-detect ROCm, however, if your Linux distro or installation approach uses unusual paths, you can specify the location by -specifying an environment variable `ROCM_PATH` to the location of the ROCm -install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the -CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize -the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`) - -Then generate dependencies: (Adjust the job count based on your number of processors for a faster build) +specifying an environment variable `HIP_PATH` to the location of the ROCm +install (typically `/opt/rocm`). You can also customize +the AMD GPU targets by setting HIP_ARCHS (e.g. `HIP_ARCHS=gfx1101;gfx1102`) ``` make -j 5 ``` -Then build the binary: - -``` -go build . -``` - ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root. #### Advanced CPU Settings @@ -108,7 +81,7 @@ Custom CPU settings are not currently supported in the new Go server build but w #### Containerized Linux Build -If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist` +If you have Docker and buildx available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting artifacts are placed in `./dist` and by default the script builds both arm64 and amd64 binaries. If you want to build only amd64, you can build with `PLATFORM=linux/amd64 ./scripts/build_linux.sh` ### Windows @@ -126,12 +99,8 @@ The following tools are required as a minimal development environment to build C > [!NOTE] > Due to bugs in the GCC C++ library for unicode support, Ollama should be built with clang on windows. -Then, build the `ollama` binary: - -```powershell -$env:CGO_ENABLED="1" -make -j 8 -go build . +``` +make -j 5 ``` #### GPU Support diff --git a/llama/Makefile b/llama/Makefile index 47a87a75..43fe069c 100644 --- a/llama/Makefile +++ b/llama/Makefile @@ -8,40 +8,62 @@ ifeq ($(OS),windows) CUDA_PATH?=$(shell cygpath -m -s "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" 2>/dev/null)unknown CUDA_BASE_DIR := $(dir $(shell cygpath -m -s "$(CUDA_PATH)\\.." 2>/dev/null)) CUDA_11:=$(shell ls -d $(CUDA_BASE_DIR)/v11.? 2>/dev/null) + CUDA_11_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc.exe) CUDA_12:=$(shell ls -d $(CUDA_BASE_DIR)/v12.? 2>/dev/null) + CUDA_12_COMPILER:=$(wildcard $(CUDA_12)/bin/nvcc.exe) HIP_LIB_DIR := $(shell ls -d $(HIP_PATH)/lib 2>/dev/null) + HIP_COMPILER:=$(wildcard $(HIP_PATH)/bin/hipcc.bin.exe) else ifeq ($(OS),linux) - HIP_PATH?=/opt/rocm - HIP_LIB_DIR := $(shell ls -d $(HIP_PATH)/lib 2>/dev/null) CUDA_PATH?=/usr/local/cuda CUDA_11:=$(shell ls -d $(CUDA_PATH)-11 2>/dev/null) + CUDA_11_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc) CUDA_12:=$(shell ls -d $(CUDA_PATH)-12 2>/dev/null) + CUDA_12_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc) + HIP_PATH?=/opt/rocm + HIP_LIB_DIR := $(shell ls -d $(HIP_PATH)/lib 2>/dev/null) + HIP_COMPILER:=$(wildcard $(HIP_PATH)/bin/hipcc) endif ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),) -ifneq ($(CUDA_11),) +ifneq ($(CUDA_11_COMPILER),) RUNNER_TARGETS += cuda_v11 endif -ifneq ($(CUDA_12),) +ifneq ($(CUDA_12_COMPILER),) RUNNER_TARGETS += cuda_v12 endif endif ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),) -ifneq ($(HIP_LIB_DIR),) +ifneq ($(HIP_COMPILER),) RUNNER_TARGETS += rocm endif endif -all: clean-payload .WAIT runners +all: runners exe + +dist: $(addprefix dist_, $(RUNNER_TARGETS)) + +dist_%: + @$(MAKE) --no-print-directory -f make/Makefile.$* dist + +payload: clean-payload .WAIT $(addprefix payload_, $(RUNNER_TARGETS)) + +payload_%: + @$(MAKE) --no-print-directory f make/Makefile.$* dist runners: $(RUNNER_TARGETS) $(RUNNER_TARGETS): - $(MAKE) -f make/Makefile.$@ + @$(MAKE) --no-print-directory -f make/Makefile.$@ + +exe: + @$(MAKE) --no-print-directory -f make/Makefile.ollama help-sync apply-patches create-patches sync: - $(MAKE) -f make/Makefile.sync $@ + @$(MAKE) --no-print-directory -f make/Makefile.sync $@ + +test integration lint: + @$(MAKE) --no-print-directory -f make/Makefile.test $@ clean: rm -rf $(BUILD_DIR) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS) @@ -50,7 +72,43 @@ clean: clean-payload: rm -rf $(addprefix $(RUNNERS_PAYLOAD_DIR)/, $(RUNNER_TARGETS) metal cpu cpu_avx cpu_avx2) -.PHONY: all runners clean clean-payload $(RUNNER_TARGETS) .WAIT +help: + @echo "The following make targets will help you BUILD Ollama" + @echo "" + @echo " make all # (default) Build Ollama llm subprocess runners, and the primary ollama executable" + @echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable" + @echo " make # Build specific runners. Enabled: $(RUNNER_TARGETS)" + @echo " make payload # Build the runners as payloads (Linux/Mac only)" + @echo " make dist # Build the runners for distribution and gather dependencies" + @echo " make help-sync # Help information on vendor update targets" + @echo " make help-runners # Help information on runner targets" + @echo "" + @echo "The following make targets will help you TEST Ollama" + @echo "" + @echo " make test # Run unit tests" + @echo " make integration # Run integration tests. You must 'make all' first" + @echo " make lint # Run lint and style tests" + @echo "" + @echo "For more information see 'docs/development.md'" + @echo "" + + +help-runners: + @echo "The following runners will be built based on discovered GPU libraries: $(RUNNER_TARGETS)" + @echo "(On MacOS arm64 'default' is the metal runner. For all other platforms 'default' is one or more CPU runners)" + @echo "" + @echo "# CUDA_PATH sets the location where CUDA toolkits are present" + @echo "CUDA_PATH=$(CUDA_PATH)" + @echo " CUDA_11=$(CUDA_11)" + @echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)" + @echo " CUDA_12=$(CUDA_12)" + @echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)" + @echo "" + @echo "# HIP_PATH sets the location where the ROCm toolkit is present" + @echo "HIP_PATH=$(HIP_PATH)" + @echo " HIP_COMPILER=$(HIP_COMPILER)" + +.PHONY: all exe dist payload help help-sync help-runners test integration lint runners clean clean-payload $(RUNNER_TARGETS) $(addprefix dist_, $(RUNNER_TARGETS)) $(addprefix payload_, $(RUNNER_TARGETS)) .WAIT # Handy debugging for make variables print-%: diff --git a/llama/make/Makefile.default b/llama/make/Makefile.default index 95b13a73..16011b1b 100644 --- a/llama/make/Makefile.default +++ b/llama/make/Makefile.default @@ -19,7 +19,11 @@ PAYLOAD_RUNNERS = $(addprefix $(RUNNERS_PAYLOAD_DIR)/,$(addsuffix /ollama_llama_ endif BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(RUNNERS))) -all: $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS) +default: $(BUILD_RUNNERS) + +dist: $(DIST_RUNNERS) + +payload: $(PAYLOAD_RUNNERS) $(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS) $(RUNNERS_BUILD_DIR)/$(DEFAULT_RUNNER)/ollama_llama_server$(EXE_EXT): *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS) @@ -47,7 +51,7 @@ $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server$(EXE_EXT).gz: $(RUNNERS_BUILD_DIR)/ clean: rm -f $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS) -.PHONY: clean all +.PHONY: clean default # Handy debugging for make variables print-%: diff --git a/llama/make/Makefile.ollama b/llama/make/Makefile.ollama new file mode 100644 index 00000000..e199e31f --- /dev/null +++ b/llama/make/Makefile.ollama @@ -0,0 +1,19 @@ +# Makefile for building top-level ollama binary + +include make/common-defs.make + +ollama: $(OLLAMA_EXE) + +GO_DEPS=$(foreach dir,$(shell go list -deps -f '{{.Dir}}' ../ ),$(wildcard $(dir)/*.go)) +CPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERSION)\" \"-X=github.com/ollama/ollama/llama.CpuFeatures=$(subst $(space),$(comma),$(TARGET_CPU_FLAGS))\" $(TARGET_LDFLAGS)" +PAYLOADS=$(wildcard $(RUNNERS_PAYLOAD_DIR)/*/*.gz) + +$(OLLAMA_EXE): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS) +$(OLLAMA_EXE): $(COMMON_SRCS) $(COMMON_HDRS) $(PAYLOADS) $(GO_DEPS) + GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS))) -o $@ ../ + +.PHONY: ollama + +# Handy debugging for make variables +print-%: + @echo '$*=$($*)' diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm index 4ab176b4..8697fe43 100644 --- a/llama/make/Makefile.rocm +++ b/llama/make/Makefile.rocm @@ -13,12 +13,14 @@ ifeq ($(OS),windows) CGO_EXTRA_LDFLAGS_WIN := -L$(shell cygpath -m -s "$(HIP_PATH)/lib") GPU_COMPILER_WIN := $(HIP_PATH)/bin/hipcc.bin.exe GPU_COMPILER:=$(GPU_COMPILER_WIN) + HIP_ARCHS?=$(HIP_ARCHS_COMMON) else ifeq ($(OS),linux) GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib - GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X) + GPU_COMPILER_LINUX := $(wildcard $(HIP_PATH)/bin/hipcc) GPU_COMPILER:=$(GPU_COMPILER_LINUX) ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf)) GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL)) + HIP_ARCHS?=$(HIP_ARCHS_COMMON) $(HIP_ARCHS_LINUX) endif # TODO future multi-variant support for ROCm @@ -49,11 +51,10 @@ ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifes ifeq ($(OS),linux) GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++11 - GPU_RUNNER_ARCH_FLAGS := $(foreach arch, $(HIP_ARCHS_COMMON) $(HIP_ARCHS_LINUX), --offload-arch=$(arch)) else ifeq ($(OS),windows) GPU_COMPILER_FPIC := -Xclang --dependent-lib=msvcrt - GPU_RUNNER_ARCH_FLAGS := $(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch)) endif +GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(HIP_ARCHS)),--offload-arch=$(arch)) GPU_COMPILER_CUFLAGS = \ $(GPU_COMPILER_FPIC) \ diff --git a/llama/make/Makefile.sync b/llama/make/Makefile.sync index a6a7124f..62b12405 100644 --- a/llama/make/Makefile.sync +++ b/llama/make/Makefile.sync @@ -13,11 +13,11 @@ LLAMACPP_PATCH_DIR := $(DST_DIR)patches/ help-sync: @echo "The following make targets will help you update llama.cpp to a new base commit, or work on new features/fixes" @echo "" - @echo "\tmake apply-patches # Establish the tracking repo if not already present, reset to the base commit, and apply our patch set" - @echo "\tmake sync # Vendor llama.cpp and ggml from the tracking repo working tree" - @echo "\tmake create-patches # Generate the patch set based on the current commits in the tracking repo since the base commit" + @echo " make apply-patches # Establish the tracking repo if not already present, reset to the base commit, and apply our patch set" + @echo " make sync # Vendor llama.cpp and ggml from the tracking repo working tree" + @echo " make create-patches # Generate the patch set based on the current commits in the tracking repo since the base commit" @echo "" - @echo "For more details on the workflow, see the Vendoring section in ../docs/development.md" + @echo "For more details on the workflow, see the Vendoring section in 'docs/development.md'" apply-patches: $(LLAMACPP_REPO) @if ! git -C $(LLAMACPP_REPO) --no-pager diff --exit-code ; then \ diff --git a/llama/make/Makefile.test b/llama/make/Makefile.test new file mode 100644 index 00000000..e6423720 --- /dev/null +++ b/llama/make/Makefile.test @@ -0,0 +1,19 @@ +# Targets to assist in running tests + +include make/common-defs.make + +test: + cd $(abspath $(SRC_DIR)/..) && go test ./... + +integration: $(OLLAMA_EXE) + cd $(abspath $(SRC_DIR)/..) && go test --tags=integration ./integration -v + +lint: + cd $(abspath $(SRC_DIR)/..) && golangci-lint run -v + +# Note: in this makefile we error instead of building to allow more fine-grain control if testing flows +$(OLLAMA_EXE): + @echo "" + @echo "ERROR: You must build ollama first - use 'make all' to build the ollama binaries" + @echo "" + @exit 1 \ No newline at end of file diff --git a/llama/make/common-defs.make b/llama/make/common-defs.make index 8ba33501..006c6001 100644 --- a/llama/make/common-defs.make +++ b/llama/make/common-defs.make @@ -82,3 +82,5 @@ COMMON_SRCS := \ COMMON_HDRS := \ $(wildcard *.h) \ $(wildcard *.hpp) + +OLLAMA_EXE=$(abspath $(SRC_DIR)../ollama$(EXE_EXT)) \ No newline at end of file diff --git a/llama/make/gpu.make b/llama/make/gpu.make index fbd8dbca..b76af061 100644 --- a/llama/make/gpu.make +++ b/llama/make/gpu.make @@ -67,7 +67,11 @@ endif BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME))) -$(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS) +$(GPU_RUNNER_NAME): $(BUILD_RUNNERS) + +dist: $(DIST_RUNNERS) + +payload: $(PAYLOAD_RUNNERS) # Build targets $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu diff --git a/runners/common.go b/runners/common.go index 19014d75..29436468 100644 --- a/runners/common.go +++ b/runners/common.go @@ -51,14 +51,18 @@ func Refresh(payloadFS fs.FS) (string, error) { }() } - if hasPayloads(payloadFS) { - if runnersDir == "" { - runnersDir, err = extractRunners(payloadFS) - } else { - err = refreshRunners(payloadFS, runnersDir) + // avoid payloads if we're operating off a local build + d, err := locateRunners() + if err != nil { + if hasPayloads(payloadFS) { + if runnersDir == "" { + runnersDir, err = extractRunners(payloadFS) + } else { + err = refreshRunners(payloadFS, runnersDir) + } } - } else if runnersDir == "" { - runnersDir, err = locateRunners() + } else { + runnersDir = d } return runnersDir, err @@ -78,31 +82,23 @@ func Cleanup(payloadFS fs.FS) { } } +// locateRunners searches for runners in a prioritized set of locations +// 1. local build, with executable at the top of the tree +// 2. lib directory relative to executable +// 3. payload extracted to OLLAMA_TMPDIR (this routine returns an error) func locateRunners() (string, error) { exe, err := os.Executable() if err != nil { return "", err } - cwd, err := os.Getwd() - if err != nil { - return "", err + paths := []string{ + filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"), + filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"), } - - var paths []string - for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} { - paths = append(paths, - root, - filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH), - filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH), - ) - } - - // Try a few variations to improve developer experience when building from source in the local tree for _, path := range paths { - candidate := filepath.Join(path, "lib", "ollama", "runners") - if _, err := os.Stat(candidate); err == nil { - return candidate, nil + if _, err := os.Stat(path); err == nil { + return path, nil } } return "", fmt.Errorf("unable to locate runners in any search path %v", paths)