Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners. When built without overrides, the runner name contains the vector flag we check for (AVX) to ensure we don't try to run on unsupported systems and crash. If the user builds a customized set, we omit the naming scheme and don't check for compatibility. This avoids checking requirements at runtime, so that logic has been removed as well. This can be used to build GPU runners with no vector flags, or CPU/GPU runners with additional flags (e.g. AVX512) enabled.
This commit is contained in:
parent
f3e67901de
commit
4f4f64c5cc
8
.github/workflows/release.yaml
vendored
8
.github/workflows/release.yaml
vendored
@ -85,7 +85,7 @@ jobs:
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make
|
||||
make dist
|
||||
name: make
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@ -143,8 +143,8 @@ jobs:
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make -C llama print-HIP_PATH print-HIP_LIB_DIR
|
||||
make rocm
|
||||
make help-runners
|
||||
make dist_rocm
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: generate-windows-rocm
|
||||
@ -226,7 +226,7 @@ jobs:
|
||||
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
|
||||
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
|
||||
make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
|
||||
make dist_cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: generate-windows-cuda-${{ matrix.cuda.version }}
|
||||
|
22
Dockerfile
22
Dockerfile
@ -1,9 +1,7 @@
|
||||
ARG GOLANG_VERSION=1.22.8
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
ARG CUDA_VERSION_11=11.3.1
|
||||
ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
|
||||
ARG CUDA_VERSION_12=12.4.0
|
||||
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
|
||||
ARG ROCM_VERSION=6.1.2
|
||||
ARG JETPACK_6=r36.2.0
|
||||
ARG JETPACK_5=r35.4.1
|
||||
@ -62,7 +60,7 @@ RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/
|
||||
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
|
||||
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
||||
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
|
||||
ENV GOARCH amd64
|
||||
ENV GOARCH arm64
|
||||
ENV CGO_ENABLED 1
|
||||
WORKDIR /go/src/github.com/ollama/ollama/
|
||||
ENTRYPOINT [ "zsh" ]
|
||||
@ -70,29 +68,21 @@ ENTRYPOINT [ "zsh" ]
|
||||
FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
|
||||
COPY . .
|
||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
||||
ARG OLLAMA_SKIP_CUDA_11_GENERATE
|
||||
ARG OLLAMA_SKIP_CUDA_12_GENERATE
|
||||
ARG OLLAMA_SKIP_ROCM_GENERATE
|
||||
ARG CUDA_V11_ARCHITECTURES
|
||||
ARG CUDA_V12_ARCHITECTURES
|
||||
ARG OLLAMA_FAST_BUILD
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
|
||||
make -j $(expr $(nproc) / 2 ) ; \
|
||||
make -j $(expr $(nproc) / 2 ) dist payload ; \
|
||||
else \
|
||||
make -j 5 ; \
|
||||
make -j 5 dist payload ; \
|
||||
fi
|
||||
|
||||
FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
|
||||
COPY . .
|
||||
ARG OLLAMA_SKIP_CUDA_GENERATE
|
||||
ARG OLLAMA_SKIP_CUDA_11_GENERATE
|
||||
ARG OLLAMA_SKIP_CUDA_12_GENERATE
|
||||
ARG CUDA_V11_ARCHITECTURES
|
||||
ARG CUDA_V12_ARCHITECTURES
|
||||
ARG OLLAMA_FAST_BUILD
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
make -j 5
|
||||
make -j 5 dist payload
|
||||
|
||||
# Jetsons need to be built in discrete stages
|
||||
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
|
||||
@ -107,7 +97,7 @@ COPY . .
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
make -j 5 cuda_v11 \
|
||||
make -j 5 dist_cuda_v11 payload_cuda_v11 \
|
||||
CUDA_ARCHITECTURES="72;87" \
|
||||
GPU_RUNNER_VARIANT=_jetpack5 \
|
||||
CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
|
||||
@ -126,7 +116,7 @@ COPY . .
|
||||
ARG CGO_CFLAGS
|
||||
ENV GOARCH arm64
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
make -j 5 cuda_v12 \
|
||||
make -j 5 dist_cuda_v12 payload_cuda_v12 \
|
||||
CUDA_ARCHITECTURES="87" \
|
||||
GPU_RUNNER_VARIANT=_jetpack6 \
|
||||
CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
|
||||
|
@ -3,7 +3,7 @@
|
||||
Install required tools:
|
||||
|
||||
- go version 1.22 or higher
|
||||
- OS specific native compiler (see below)
|
||||
- OS specific C/C++ compiler (see below)
|
||||
- GNU Make
|
||||
|
||||
|
||||
@ -69,16 +69,6 @@ make -j 5
|
||||
|
||||
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
|
||||
|
||||
#### Advanced CPU Settings
|
||||
|
||||
By default, running `make` will compile a few different variations
|
||||
of the LLM library based on common CPU families and vector math capabilities,
|
||||
including a lowest-common-denominator which should run on almost any 64 bit CPU
|
||||
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
|
||||
load.
|
||||
|
||||
Custom CPU settings are not currently supported in the new Go server build but will be added back after we complete the transition.
|
||||
|
||||
#### Containerized Linux Build
|
||||
|
||||
If you have Docker and buildx available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting artifacts are placed in `./dist` and by default the script builds both arm64 and amd64 binaries. If you want to build only amd64, you can build with `PLATFORM=linux/amd64 ./scripts/build_linux.sh`
|
||||
@ -142,3 +132,30 @@ pacman -S mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-gcc-compat mingw
|
||||
```
|
||||
|
||||
You will need to ensure your PATH includes go, cmake, gcc and clang mingw32-make to build ollama from source. (typically `C:\msys64\clangarm64\bin\`)
|
||||
|
||||
|
||||
## Advanced CPU Vector Settings
|
||||
|
||||
On x86, running `make` will compile several CPU runners which can run on different CPU families. At runtime, Ollama will auto-detect the best variation to load. If GPU libraries are present at build time, Ollama also compiles GPU runners with the `AVX` CPU vector feature enabled. This provides a good performance balance when loading large models that split across GPU and CPU with broad compatibility. Some users may prefer no vector extensions (e.g. older Xeon/Celeron processors, or hypervisors that mask the vector features) while other users may prefer turning on many more vector extensions to further improve performance for split model loads.
|
||||
|
||||
To customize the set of CPU vector features enabled for a CPU runner and all GPU runners, use CUSTOM_CPU_FLAGS during the build.
|
||||
|
||||
To build without any vector flags:
|
||||
|
||||
```
|
||||
make CUSTOM_CPU_FLAGS=""
|
||||
```
|
||||
|
||||
To build with both AVX and AVX2:
|
||||
```
|
||||
make CUSTOM_CPU_FLAGS=avx,avx2
|
||||
```
|
||||
|
||||
To build with AVX512 features turned on:
|
||||
|
||||
```
|
||||
make CUSTOM_CPU_FLAGS=avx,avx2,avx512,avx512vbmi,avx512vnni,avx512bf16
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> If you are experimenting with different flags, make sure to do a `make clean` between each change to ensure everything is rebuilt with the new compiler flags
|
||||
|
@ -4,26 +4,20 @@ include make/common-defs.make
|
||||
RUNNER_TARGETS := default
|
||||
|
||||
# Determine which if any GPU runners we should build
|
||||
include make/cuda-v11-defs.make
|
||||
include make/cuda-v12-defs.make
|
||||
|
||||
ifeq ($(OS),windows)
|
||||
CUDA_PATH?=$(shell cygpath -m -s "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" 2>/dev/null)unknown
|
||||
CUDA_BASE_DIR := $(dir $(shell cygpath -m -s "$(CUDA_PATH)\\.." 2>/dev/null))
|
||||
CUDA_11:=$(shell ls -d $(CUDA_BASE_DIR)/v11.? 2>/dev/null)
|
||||
CUDA_11_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc.exe)
|
||||
CUDA_12:=$(shell ls -d $(CUDA_BASE_DIR)/v12.? 2>/dev/null)
|
||||
CUDA_12_COMPILER:=$(wildcard $(CUDA_12)/bin/nvcc.exe)
|
||||
HIP_LIB_DIR := $(shell ls -d $(HIP_PATH)/lib 2>/dev/null)
|
||||
HIP_COMPILER:=$(wildcard $(HIP_PATH)/bin/hipcc.bin.exe)
|
||||
else ifeq ($(OS),linux)
|
||||
CUDA_PATH?=/usr/local/cuda
|
||||
CUDA_11:=$(shell ls -d $(CUDA_PATH)-11 2>/dev/null)
|
||||
CUDA_11_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc)
|
||||
CUDA_12:=$(shell ls -d $(CUDA_PATH)-12 2>/dev/null)
|
||||
CUDA_12_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc)
|
||||
HIP_PATH?=/opt/rocm
|
||||
HIP_LIB_DIR := $(shell ls -d $(HIP_PATH)/lib 2>/dev/null)
|
||||
HIP_COMPILER:=$(wildcard $(HIP_PATH)/bin/hipcc)
|
||||
endif
|
||||
|
||||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
||||
ifneq ($(CUDA_11_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v11
|
||||
@ -32,6 +26,14 @@ ifneq ($(CUDA_12_COMPILER),)
|
||||
RUNNER_TARGETS += cuda_v12
|
||||
endif
|
||||
endif
|
||||
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
|
||||
ifneq ($(CUDA_12),)
|
||||
RUNNER_TARGETS += cuda_v12
|
||||
else ifneq ($(CUDA_11),)
|
||||
RUNNER_TARGETS += cuda_v11
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
|
||||
ifneq ($(HIP_COMPILER),)
|
||||
RUNNER_TARGETS += rocm
|
||||
@ -46,10 +48,10 @@ dist: $(addprefix dist_, $(RUNNER_TARGETS))
|
||||
dist_%:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.$* dist
|
||||
|
||||
payload: clean-payload .WAIT $(addprefix payload_, $(RUNNER_TARGETS))
|
||||
payload: $(addprefix payload_, $(RUNNER_TARGETS))
|
||||
|
||||
payload_%:
|
||||
@$(MAKE) --no-print-directory f make/Makefile.$* dist
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.$* payload
|
||||
|
||||
runners: $(RUNNER_TARGETS)
|
||||
|
||||
@ -65,8 +67,8 @@ help-sync apply-patches create-patches sync:
|
||||
test integration lint:
|
||||
@$(MAKE) --no-print-directory -f make/Makefile.test $@
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILD_DIR) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
|
||||
clean: clean-payload
|
||||
rm -rf "$(BUILD_DIR)" "$(DIST_LIB_DIR)"
|
||||
go clean -cache
|
||||
|
||||
clean-payload:
|
||||
@ -75,9 +77,9 @@ clean-payload:
|
||||
help:
|
||||
@echo "The following make targets will help you BUILD Ollama"
|
||||
@echo ""
|
||||
@echo " make all # (default) Build Ollama llm subprocess runners, and the primary ollama executable"
|
||||
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
|
||||
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
|
||||
@echo " make <runner> # Build specific runners. Enabled: $(RUNNER_TARGETS)"
|
||||
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
|
||||
@echo " make payload # Build the runners as payloads (Linux/Mac only)"
|
||||
@echo " make dist # Build the runners for distribution and gather dependencies"
|
||||
@echo " make help-sync # Help information on vendor update targets"
|
||||
@ -94,9 +96,11 @@ help:
|
||||
|
||||
|
||||
help-runners:
|
||||
@echo "The following runners will be built based on discovered GPU libraries: $(RUNNER_TARGETS)"
|
||||
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
|
||||
@echo "(On MacOS arm64 'default' is the metal runner. For all other platforms 'default' is one or more CPU runners)"
|
||||
@echo ""
|
||||
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)"
|
||||
@echo ""
|
||||
@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
|
||||
@echo "CUDA_PATH=$(CUDA_PATH)"
|
||||
@echo " CUDA_11=$(CUDA_11)"
|
||||
@ -108,7 +112,7 @@ help-runners:
|
||||
@echo "HIP_PATH=$(HIP_PATH)"
|
||||
@echo " HIP_COMPILER=$(HIP_COMPILER)"
|
||||
|
||||
.PHONY: all exe dist payload help help-sync help-runners test integration lint runners clean clean-payload $(RUNNER_TARGETS) $(addprefix dist_, $(RUNNER_TARGETS)) $(addprefix payload_, $(RUNNER_TARGETS)) .WAIT
|
||||
.PHONY: all exe dist payload help help-sync help-runners test integration lint runners clean clean-payload $(RUNNER_TARGETS)
|
||||
|
||||
# Handy debugging for make variables
|
||||
print-%:
|
||||
|
@ -9,17 +9,19 @@ package llama
|
||||
#cgo amd64,avx CXXFLAGS: -mavx
|
||||
#cgo amd64,avx2 CFLAGS: -mavx2 -mfma
|
||||
#cgo amd64,avx2 CXXFLAGS: -mavx2 -mfma
|
||||
#cgo amd64,avx512 CFLAGS: -mavx512f -mavx512dq -mavx512bw
|
||||
#cgo amd64,avx512 CXXFLAGS: -mavx512f -mavx512dq -mavx512bw
|
||||
#cgo amd64,avx512bf16 CFLAGS: -mavx512bf16 -D__AVX512BF16__
|
||||
#cgo amd64,avx512bf16 CXXFLAGS: -mavx512bf16 -D__AVX512BF16__
|
||||
#cgo amd64,avx512vbmi CFLAGS: -mavx512vbmi -D__AVX512VBMI__
|
||||
#cgo amd64,avx512vbmi CXXFLAGS: -mavx512vbmi -D__AVX512VBMI__
|
||||
#cgo amd64,avx512vnni CFLAGS: -mavx512vnni -D__AVX512VNNI__
|
||||
#cgo amd64,avx512vnni CXXFLAGS: -mavx512vnni -D__AVX512VNNI__
|
||||
#cgo amd64,f16c CFLAGS: -mf16c
|
||||
#cgo amd64,f16c CXXFLAGS: -mf16c
|
||||
#cgo amd64,fma CFLAGS: -mfma
|
||||
#cgo amd64,fma CXXFLAGS: -mfma
|
||||
#cgo avx CFLAGS: -mavx
|
||||
#cgo avx CXXFLAGS: -mavx
|
||||
#cgo avx2 CFLAGS: -mavx2 -mfma -mf16c
|
||||
#cgo avx2 CXXFLAGS: -mavx2 -mfma -mf16c
|
||||
#cgo cuda CFLAGS: -fPIE -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
|
||||
#cgo cuda CFLAGS: -fPIE -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
|
||||
#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
|
||||
#cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
|
||||
#cgo cuda_jetpack5 LDFLAGS: -lggml_cuda_jetpack5 -L/usr/local/cuda-11/lib64
|
||||
#cgo cuda_jetpack6 LDFLAGS: -lggml_cuda_jetpack6 -L/usr/local/cuda-12/lib64
|
||||
@ -37,7 +39,6 @@ package llama
|
||||
#cgo linux CFLAGS: -D_GNU_SOURCE
|
||||
#cgo linux CXXFLAGS: -D_GNU_SOURCE
|
||||
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
|
||||
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
|
||||
#cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
||||
#cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
||||
#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64
|
||||
@ -50,14 +51,11 @@ package llama
|
||||
#cgo rocm LDFLAGS: -L${SRCDIR} -lggml_rocm -lhipblas -lamdhip64 -lrocblas
|
||||
#cgo windows CFLAGS: -Wno-discarded-qualifiers -D_WIN32_WINNT=0x602
|
||||
#cgo windows CXXFLAGS: -D_WIN32_WINNT=0x602
|
||||
#cgo windows LDFLAGS: -lmsvcrt
|
||||
#cgo windows LDFLAGS: -lmsvcrt -static-libstdc++ -static-libgcc -static
|
||||
#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/Windows/amd64
|
||||
#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/Windows/amd64
|
||||
#cgo windows,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
||||
#cgo windows,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
|
||||
#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/Windows/arm64
|
||||
#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/Windows/arm64
|
||||
#cgo windows,cuda LDFLAGS: -lcuda -lcudart -lcublas -lcublasLt
|
||||
#cgo windows,rocm LDFLAGS: -lggml_rocm -lhipblas -lamdhip64 -lrocblas
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Build rules for CUDA v11 runner
|
||||
|
||||
include make/common-defs.make
|
||||
|
||||
include make/cuda-v11-defs.make
|
||||
|
||||
GPU_RUNNER_VARIANT := _v11
|
||||
GPU_PATH_ROOT_WIN=$(shell ls -d $(dir $(shell cygpath -m -s "$(CUDA_PATH)\.."))/v11.? 2>/dev/null)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Build rules for CUDA v12 runner
|
||||
|
||||
include make/common-defs.make
|
||||
|
||||
include make/cuda-v12-defs.make
|
||||
|
||||
GPU_RUNNER_VARIANT := _v12
|
||||
GPU_PATH_ROOT_WIN=$(shell ls -d $(dir $(shell cygpath -m -s "$(CUDA_PATH)\.."))/v12.? 2>/dev/null)
|
||||
|
@ -8,7 +8,7 @@ CPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERS
|
||||
DEFAULT_RUNNER := $(if $(and $(filter darwin,$(OS)),$(filter arm64,$(ARCH))),metal,cpu)
|
||||
RUNNERS := $(DEFAULT_RUNNER)
|
||||
ifeq ($(ARCH),amd64)
|
||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||
ifeq ($(origin CUSTOM_CPU_FLAGS),undefined)
|
||||
RUNNERS += cpu_avx cpu_avx2
|
||||
endif
|
||||
endif
|
||||
@ -51,7 +51,7 @@ $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server$(EXE_EXT).gz: $(RUNNERS_BUILD_DIR)/
|
||||
clean:
|
||||
rm -f $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
|
||||
|
||||
.PHONY: clean default
|
||||
.PHONY: clean default dist
|
||||
|
||||
# Handy debugging for make variables
|
||||
print-%:
|
||||
|
@ -56,9 +56,12 @@ else ifeq ($(OS),windows)
|
||||
endif
|
||||
GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(HIP_ARCHS)),--offload-arch=$(arch))
|
||||
|
||||
# HIPCC uses clang which requires avx512 -> -mavx512f -mavx512dq -mavx512bw
|
||||
GPU_VECTOR_FLAGS=$(if $(filter avx512,$(GPU_RUNNER_CPU_FLAGS)),avx512f avx512dq avx512bw) $(filter-out avx512,$(GPU_RUNNER_CPU_FLAGS))
|
||||
|
||||
GPU_COMPILER_CUFLAGS = \
|
||||
$(GPU_COMPILER_FPIC) \
|
||||
$(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \
|
||||
$(addprefix -m,$(GPU_VECTOR_FLAGS)) \
|
||||
-mf16c \
|
||||
-mfma \
|
||||
-parallel-jobs=2 \
|
||||
@ -102,7 +105,7 @@ endif
|
||||
include make/gpu.make
|
||||
|
||||
# Adjust the rules from gpu.make to handle the ROCm dependencies properly
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST)
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST)
|
||||
$(ROCBLAS_DIST_DEP_MANIFEST):
|
||||
@-mkdir -p $(dir $@)
|
||||
@echo "Copying rocblas library..."
|
||||
|
@ -11,7 +11,7 @@ integration: $(OLLAMA_EXE)
|
||||
lint:
|
||||
cd $(abspath $(SRC_DIR)/..) && golangci-lint run -v
|
||||
|
||||
# Note: in this makefile we error instead of building to allow more fine-grain control if testing flows
|
||||
# Note: in this makefile we error instead of building to allow more fine-grain control of testing flows
|
||||
$(OLLAMA_EXE):
|
||||
@echo ""
|
||||
@echo "ERROR: You must build ollama first - use 'make all' to build the ollama binaries"
|
||||
|
@ -44,9 +44,14 @@ ifneq ($(CCACHE),)
|
||||
endif
|
||||
|
||||
|
||||
# Override in environment space separated to tune GPU runner CPU vector flags
|
||||
# Override in environment to tune CPU vector flags
|
||||
ifeq ($(ARCH),amd64)
|
||||
GPU_RUNNER_CPU_FLAGS ?= avx
|
||||
ifeq ($(origin CUSTOM_CPU_FLAGS),undefined)
|
||||
GPU_RUNNER_CPU_FLAGS=avx
|
||||
GPU_RUNNER_EXTRA_VARIANT=_avx
|
||||
else
|
||||
GPU_RUNNER_CPU_FLAGS=$(subst $(comma),$(space),$(CUSTOM_CPU_FLAGS))
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OS),windows)
|
||||
|
13
llama/make/cuda-v11-defs.make
Normal file
13
llama/make/cuda-v11-defs.make
Normal file
@ -0,0 +1,13 @@
|
||||
# Common definitions for the various Makefiles which set cuda settings
|
||||
# No rules are defined here so this is safe to include at the beginning of other makefiles
|
||||
|
||||
ifeq ($(OS),windows)
|
||||
CUDA_PATH?=$(shell cygpath -m -s "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" 2>/dev/null)unknown
|
||||
CUDA_BASE_DIR := $(dir $(shell cygpath -m -s "$(CUDA_PATH)\\.." 2>/dev/null))
|
||||
CUDA_11:=$(shell ls -d $(CUDA_BASE_DIR)/v11.? 2>/dev/null)
|
||||
CUDA_11_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc.exe)
|
||||
else ifeq ($(OS),linux)
|
||||
CUDA_PATH?=/usr/local/cuda
|
||||
CUDA_11:=$(shell ls -d $(CUDA_PATH)-11 2>/dev/null)
|
||||
CUDA_11_COMPILER:=$(wildcard $(CUDA_11)/bin/nvcc)
|
||||
endif
|
13
llama/make/cuda-v12-defs.make
Normal file
13
llama/make/cuda-v12-defs.make
Normal file
@ -0,0 +1,13 @@
|
||||
# Common definitions for the various Makefiles which set cuda settings
|
||||
# No rules are defined here so this is safe to include at the beginning of other makefiles
|
||||
|
||||
ifeq ($(OS),windows)
|
||||
CUDA_PATH?=$(shell cygpath -m -s "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" 2>/dev/null)unknown
|
||||
CUDA_BASE_DIR := $(dir $(shell cygpath -m -s "$(CUDA_PATH)\\.." 2>/dev/null))
|
||||
CUDA_12:=$(shell ls -d $(CUDA_BASE_DIR)/v12.? 2>/dev/null)
|
||||
CUDA_12_COMPILER:=$(wildcard $(CUDA_12)/bin/nvcc.exe)
|
||||
else ifeq ($(OS),linux)
|
||||
CUDA_PATH?=/usr/local/cuda
|
||||
CUDA_12:=$(shell ls -d $(CUDA_PATH)-12 2>/dev/null)
|
||||
CUDA_12_COMPILER:=$(wildcard $(CUDA_12)/bin/nvcc)
|
||||
endif
|
@ -11,26 +11,38 @@ GPU_RUNNER_GO_TAGS := cuda cuda$(GPU_RUNNER_VARIANT)
|
||||
GPU_RUNNER_DRIVER_LIB_LINK := -lcuda
|
||||
GPU_RUNNER_LIBS_SHORT := cublas cudart cublasLt
|
||||
GPU_LIB_DIR_WIN = $(GPU_PATH_ROOT_WIN)/bin
|
||||
GPU_LIB_DIR_LINUX = $(GPU_PATH_ROOT_LINUX)/lib64
|
||||
ifneq ($(GPU_PATH_ROOT_LINUX),)
|
||||
GPU_LIB_DIR_LINUX=$(strip $(shell ls -d $(GPU_PATH_ROOT_LINUX)/lib64 2>/dev/null || ls -d $(GPU_PATH_ROOT_LINUX)/lib 2>/dev/null))
|
||||
GPU_COMPILER_LINUX = $(GPU_PATH_ROOT_LINUX)/bin/nvcc
|
||||
endif
|
||||
CGO_EXTRA_LDFLAGS_WIN = -L"$(GPU_PATH_ROOT_WIN)/lib/x64"
|
||||
GPU_COMPILER_WIN = $(GPU_PATH_ROOT_WIN)/bin/nvcc
|
||||
GPU_COMPILER_LINUX = $(GPU_PATH_ROOT_LINUX)/bin/nvcc
|
||||
GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
|
||||
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
|
||||
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
|
||||
GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT)*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
|
||||
|
||||
ifeq ($(OS),windows)
|
||||
# On windows, nvcc uses msvc which does not support avx512vbmi avx512vnni avx512bf16, but macros can turn them on
|
||||
GPU_VECTOR_FLAGS=$(call uc,$(filter-out avx512bf16,$(filter-out avx512vnni,$(filter-out avx512vbmi,$(GPU_RUNNER_CPU_FLAGS)))))
|
||||
GPU_COMPILER_EXTRA_FLAGS=$(if $(filter avx512vbmi,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512VBMI__)
|
||||
GPU_COMPILER_EXTRA_FLAGS+=$(if $(filter avx512vnni,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512VNNI__)
|
||||
GPU_COMPILER_EXTRA_FLAGS+=$(if $(filter avx512bf16,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512BF16__)
|
||||
GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
|
||||
else ifeq ($(OS),linux)
|
||||
# On linux, nvcc requires avx512 -> -mavx512f -mavx512dq -mavx512bw
|
||||
GPU_VECTOR_FLAGS=$(if $(filter avx512,$(GPU_RUNNER_CPU_FLAGS)),avx512f avx512dq avx512bw) $(filter-out avx512,$(GPU_RUNNER_CPU_FLAGS))
|
||||
CUDA_PATH?=/usr/local/cuda
|
||||
GPU_COMPILER_EXTRA_FLAGS = -fPIC -Wno-unused-function -std=c++11
|
||||
GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
|
||||
endif
|
||||
GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_LIBS))))
|
||||
|
||||
ifeq ($(OS),linux)
|
||||
CUDA_PATH?=/usr/local/cuda
|
||||
GPU_COMPILER_FPIC = -fPIC -Wno-unused-function -std=c++11
|
||||
endif
|
||||
GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(CUDA_ARCHITECTURES)),--generate-code=arch=compute_$(arch)$(comma)code=[compute_$(arch)$(comma)sm_$(arch)]) \
|
||||
-DGGML_CUDA_USE_GRAPHS=1
|
||||
GPU_COMPILER_CUFLAGS = \
|
||||
$(GPU_COMPILER_FPIC) \
|
||||
-Xcompiler "$(addprefix $(CPU_FLAG_PREFIX),$(_OS_GPU_RUNNER_CPU_FLAGS))" \
|
||||
$(GPU_COMPILER_EXTRA_FLAGS) \
|
||||
-Xcompiler "$(addprefix $(CPU_FLAG_PREFIX),$(GPU_VECTOR_FLAGS))" \
|
||||
-t2 \
|
||||
-DGGML_CUDA_DMMV_X=32 \
|
||||
-DGGML_CUDA_MMV_Y=1 \
|
||||
|
@ -25,14 +25,8 @@ GPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERS
|
||||
# today, cuda is bundled, but rocm is split out. Should split them each out by runner
|
||||
DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
|
||||
|
||||
ifeq ($(OS),windows)
|
||||
_OS_GPU_RUNNER_CPU_FLAGS=$(call uc,$(GPU_RUNNER_CPU_FLAGS))
|
||||
else ifeq ($(OS),linux)
|
||||
_OS_GPU_RUNNER_CPU_FLAGS=$(GPU_RUNNER_CPU_FLAGS)
|
||||
endif
|
||||
|
||||
GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
||||
DIST_GPU_RUNNER_LIB_DEPS = $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_RUNNER_LIBS)))
|
||||
|
||||
GPU_RUNNER_SRCS := \
|
||||
ggml-cuda.cu \
|
||||
@ -60,11 +54,11 @@ GPU_RUNNER_OBJS := $(GPU_RUNNER_SRCS:.cu=.$(GPU_RUNNER_NAME).$(OBJ_EXT))
|
||||
GPU_RUNNER_OBJS := $(GPU_RUNNER_OBJS:.c=.$(GPU_RUNNER_NAME).$(OBJ_EXT))
|
||||
GPU_RUNNER_OBJS := $(addprefix $(BUILD_DIR)/,$(GPU_RUNNER_OBJS:.cpp=.$(GPU_RUNNER_NAME).$(OBJ_EXT)))
|
||||
|
||||
DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)))
|
||||
DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
||||
ifneq ($(OS),windows)
|
||||
PAYLOAD_RUNNERS = $(addprefix $(RUNNERS_PAYLOAD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT).gz,$(GPU_RUNNER_NAME)))
|
||||
PAYLOAD_RUNNERS = $(addprefix $(RUNNERS_PAYLOAD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT).gz,$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
||||
endif
|
||||
BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)))
|
||||
BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)))
|
||||
|
||||
|
||||
$(GPU_RUNNER_NAME): $(BUILD_RUNNERS)
|
||||
@ -83,11 +77,11 @@ $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
|
||||
$(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CXXFLAGS) -o $@ $<
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS)
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/" $(CGO_EXTRA_LDFLAGS)
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
|
||||
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
|
||||
|
||||
@ -95,13 +89,10 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).
|
||||
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $< $@
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_DEPS_LIBS)
|
||||
$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
||||
$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_DEPS_LIBS)
|
||||
$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $< $@
|
||||
$(DIST_GPU_RUNNER_LIB_DEPS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
|
||||
$(GPU_DIST_DEPS_LIBS):
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CP) $(dir $(filter %$(notdir $@),$(GPU_LIBS) $(GPU_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
|
||||
@ -110,7 +101,7 @@ $(GPU_DIST_DEPS_LIBS):
|
||||
$(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server
|
||||
@-mkdir -p $(dir $@)
|
||||
${GZIP} --best -c $< > $@
|
||||
$(RUNNERS_PAYLOAD_DIR)/$(GPU_RUNNER_NAME)/%.gz: $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/%
|
||||
$(RUNNERS_PAYLOAD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/%.gz: $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/%
|
||||
@-mkdir -p $(dir $@)
|
||||
${GZIP} --best -c $< > $@
|
||||
|
||||
|
@ -1,19 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/ollama/ollama/llama"
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
func printRequirements(fp *os.File) {
|
||||
attrs := map[string]string{
|
||||
"system_info": llama.PrintSystemInfo(),
|
||||
"version": version.Version,
|
||||
"cpu_features": llama.CpuFeatures,
|
||||
}
|
||||
enc := json.NewEncoder(fp)
|
||||
_ = enc.Encode(attrs)
|
||||
}
|
@ -818,13 +818,8 @@ func main() {
|
||||
mlock := flag.Bool("mlock", false, "force system to keep model in RAM rather than swapping or compressing")
|
||||
tensorSplit := flag.String("tensor-split", "", "fraction of the model to offload to each GPU, comma-separated list of proportions")
|
||||
multiUserCache := flag.Bool("multiuser-cache", false, "optimize input cache algorithm for multiple users")
|
||||
requirements := flag.Bool("requirements", false, "print json requirement information")
|
||||
|
||||
flag.Parse()
|
||||
if *requirements {
|
||||
printRequirements(os.Stdout)
|
||||
return
|
||||
}
|
||||
level := slog.LevelInfo
|
||||
if *verbose {
|
||||
level = slog.LevelDebug
|
||||
|
@ -17,6 +17,7 @@ import (
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
"golang.org/x/sys/cpu"
|
||||
|
||||
"github.com/ollama/ollama/discover"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
@ -288,7 +289,16 @@ func GetAvailableServers(payloadsDir string) map[string]string {
|
||||
servers := make(map[string]string)
|
||||
for _, file := range files {
|
||||
slog.Debug("availableServers : found", "file", file)
|
||||
servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
|
||||
runnerName := filepath.Base(filepath.Dir(file))
|
||||
// Special case for our GPU runners - if compiled with standard AVX flag
|
||||
// detect incompatible system
|
||||
// Custom builds will omit this and its up to the user to ensure compatibility
|
||||
parsed := strings.Split(runnerName, "_")
|
||||
if len(parsed) == 3 && parsed[2] == "avx" && !cpu.X86.HasAVX {
|
||||
slog.Info("GPU runner incompatible with host system, CPU does not have AVX", "runner", runnerName)
|
||||
continue
|
||||
}
|
||||
servers[runnerName] = filepath.Dir(file)
|
||||
}
|
||||
|
||||
return servers
|
||||
|
@ -82,7 +82,7 @@ function buildOllama() {
|
||||
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
|
||||
write-host "Building ollama runners"
|
||||
Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
|
||||
& make -C llama -j 12
|
||||
& make -C llama -j 12 dist
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
} else {
|
||||
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
|
||||
|
Loading…
x
Reference in New Issue
Block a user