From 0800fef8d09a6d30b48ddb07c9bfac5e43a7dec7 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Wed, 13 Nov 2024 09:02:50 -0800
Subject: [PATCH] Use relative paths

If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
---
 llama/Makefile              |  8 ++++----
 llama/llama.go              |  8 ++++----
 llama/make/Makefile.ollama  |  9 ++++----
 llama/make/Makefile.rocm    | 25 ++++++++++++++--------
 llama/make/Makefile.sync    | 41 +++++++++++++++++++------------------
 llama/make/Makefile.test    |  6 +++---
 llama/make/common-defs.make | 31 ++++++++++++++++++----------
 llama/make/cuda.make        |  2 +-
 llama/make/gpu.make         | 12 +++++------
 9 files changed, 81 insertions(+), 61 deletions(-)

diff --git a/llama/Makefile b/llama/Makefile
index 6b04312c..6a8ca50d 100644
--- a/llama/Makefile
+++ b/llama/Makefile
@@ -43,7 +43,7 @@ endif
 
 all: runners exe
 
-dist: $(addprefix dist_, $(RUNNER_TARGETS))
+dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
 
 dist_%:
 	@$(MAKE) --no-print-directory -f make/Makefile.$* dist
@@ -58,8 +58,8 @@ runners: $(RUNNER_TARGETS)
 $(RUNNER_TARGETS):
 	@$(MAKE) --no-print-directory -f make/Makefile.$@
 
-exe:
-	@$(MAKE) --no-print-directory -f make/Makefile.ollama
+exe dist_exe:
+	@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
 
 help-sync apply-patches create-patches sync:
 	@$(MAKE) --no-print-directory -f make/Makefile.sync $@
@@ -68,7 +68,7 @@ test integration lint:
 	@$(MAKE) --no-print-directory -f make/Makefile.test $@
 
 clean: clean-payload
-	rm -rf "$(BUILD_DIR)" "$(DIST_LIB_DIR)"
+	rm -rf $(BUILD_DIR) $(DIST_LIB_DIR)
 	go clean -cache
 
 clean-payload:
diff --git a/llama/llama.go b/llama/llama.go
index 7861b69b..a192e4c4 100644
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -38,10 +38,10 @@ package llama
 #cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework Accelerate
 #cgo linux CFLAGS: -D_GNU_SOURCE
 #cgo linux CXXFLAGS: -D_GNU_SOURCE
-#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
+#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/linux-amd64
 #cgo linux,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
 #cgo linux,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
-#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64
+#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/linux-arm64
 #cgo linux,arm64,sve CFLAGS: -march=armv8.6-a+sve
 #cgo linux,arm64,sve CXXFLAGS: -march=armv8.6-a+sve
 #cgo linux,cuda LDFLAGS: -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt -lresolv
@@ -52,10 +52,10 @@ package llama
 #cgo windows CFLAGS: -Wno-discarded-qualifiers -D_WIN32_WINNT=0x602
 #cgo windows CXXFLAGS: -D_WIN32_WINNT=0x602
 #cgo windows LDFLAGS: -lmsvcrt -static-libstdc++ -static-libgcc -static
-#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/Windows/amd64
+#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/windows-amd64
 #cgo windows,arm64 CFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
 #cgo windows,arm64 CXXFLAGS: -D__aarch64__ -D__ARM_NEON -D__ARM_FEATURE_FMA
-#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/Windows/arm64
+#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/windows-arm64
 #cgo windows,cuda LDFLAGS: -lcuda -lcudart -lcublas -lcublasLt
 #cgo windows,rocm LDFLAGS: -lggml_rocm -lhipblas -lamdhip64 -lrocblas
 
diff --git a/llama/make/Makefile.ollama b/llama/make/Makefile.ollama
index e199e31f..0de16a77 100644
--- a/llama/make/Makefile.ollama
+++ b/llama/make/Makefile.ollama
@@ -2,17 +2,18 @@
 
 include make/common-defs.make
 
-ollama: $(OLLAMA_EXE)
+exe ollama: $(OLLAMA_EXE)
+dist_exe dist_ollama: $(DIST_OLLAMA_EXE)
 
 GO_DEPS=$(foreach dir,$(shell go list -deps -f '{{.Dir}}' ../ ),$(wildcard $(dir)/*.go))
 CPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERSION)\" \"-X=github.com/ollama/ollama/llama.CpuFeatures=$(subst $(space),$(comma),$(TARGET_CPU_FLAGS))\" $(TARGET_LDFLAGS)"
 PAYLOADS=$(wildcard $(RUNNERS_PAYLOAD_DIR)/*/*.gz)
 
-$(OLLAMA_EXE): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS)
-$(OLLAMA_EXE): $(COMMON_SRCS) $(COMMON_HDRS) $(PAYLOADS) $(GO_DEPS) 
+$(OLLAMA_EXE) $(DIST_OLLAMA_EXE): TARGET_CPU_FLAGS=$(CUSTOM_CPU_FLAGS)
+$(OLLAMA_EXE) $(DIST_OLLAMA_EXE): $(COMMON_SRCS) $(COMMON_HDRS) $(PAYLOADS) $(GO_DEPS) 
 	GOARCH=$(ARCH) go build -buildmode=pie $(CPU_GOFLAGS) -trimpath $(if $(CUSTOM_CPU_FLAGS),-tags $(subst $(space),$(comma),$(CUSTOM_CPU_FLAGS)))  -o $@ ../
 
-.PHONY: ollama
+.PHONY: ollama dist_ollama exe dist_exe
 
 # Handy debugging for make variables
 print-%:
diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm
index 79b5e7c5..37f0cac7 100644
--- a/llama/make/Makefile.rocm
+++ b/llama/make/Makefile.rocm
@@ -18,8 +18,6 @@ else ifeq ($(OS),linux)
 	GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
 	GPU_COMPILER_LINUX := $(wildcard $(HIP_PATH)/bin/hipcc)
 	GPU_COMPILER:=$(GPU_COMPILER_LINUX)
-	ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
-	GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
 	HIP_ARCHS?=$(HIP_ARCHS_COMMON) $(HIP_ARCHS_LINUX)
 endif
 
@@ -40,13 +38,19 @@ GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
 GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
 GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
 
-GPU_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
+# Note: ROCm requires an extra step of discovering and copying the transitive dependencies on linux
 ifeq ($(OS),windows)
-	ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
+	ROCM_DIST_DEPS_DIR = ../dist/$(OS)-$(ARCH)/lib/ollama
+	GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
 else ifeq ($(OS),linux)
-	ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
+	ROCM_DIST_DEPS_DIR = ../dist/$(OS)-$(ARCH)-rocm/lib/ollama
+	GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
+	ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(GPU_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
+	GPU_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
+	FILTERED_GPU_TRANSITIVE_LIBS=$(sort $(filter-out $(addprefix %,$(notdir $(GPU_LIBS))), $(GPU_TRANSITIVE_LIBS)))
+	GPU_DIST_TRANSITIVE_LIB_DEPS = $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(FILTERED_GPU_TRANSITIVE_LIBS))))
 endif
-GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS)) $(notdir $(GPU_TRANSITIVE_LIBS))))
+GPU_DIST_LIB_DEPS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS))))
 ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
 
 ifeq ($(OS),linux)
@@ -105,9 +109,14 @@ endif
 include make/gpu.make
 
 # Adjust the rules from gpu.make to handle the ROCm dependencies properly
-$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST)
+$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(ROCBLAS_DIST_DEP_MANIFEST) $(GPU_DIST_TRANSITIVE_LIB_DEPS)
 $(ROCBLAS_DIST_DEP_MANIFEST):
 	@-mkdir -p $(dir $@)
 	@echo "Copying rocblas library..."
-	cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . | (cd $(dir $@) && tar xf - )
+	(cd $(GPU_LIB_DIR)/rocblas/library/ && tar cf - . ) | (cd $(dir $@) && tar xf - )
 	@echo "rocblas library copy complete"
+
+$(GPU_DIST_TRANSITIVE_LIB_DEPS):
+	@-mkdir -p $(dir $@)
+	echo "transitive dist deps libs rule cp: $@"
+	$(CP) $(dir $(filter %$(notdir $@),$(GPU_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
diff --git a/llama/make/Makefile.sync b/llama/make/Makefile.sync
index 62b12405..8933c347 100644
--- a/llama/make/Makefile.sync
+++ b/llama/make/Makefile.sync
@@ -1,13 +1,14 @@
 # Helpers for managing our vendored llama.cpp repo and patch set
 
-REPO_ROOT:=$(dir $(patsubst %/,%,$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))))
-DST_DIR:=$(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))
+REPO_ROOT:=../../
+DEST_DIR:=./
 
-include $(REPO_ROOT)llama/vendoring
+include $(DEST_DIR)vendoring
 
-LLAMACPP_REPO := $(REPO_ROOT)llama/vendor/
+LLAMACPP_REPO := ./vendor/
 
-LLAMACPP_PATCH_DIR := $(DST_DIR)patches/
+# Relative to the vendor dir
+VENDOR_RELATIVE_PATCH_DIR := ../patches/
 
 
 help-sync:
@@ -29,7 +30,7 @@ apply-patches: $(LLAMACPP_REPO)
 	@git -C $(LLAMACPP_REPO) checkout -q $(LLAMACPP_BASE_COMMIT) || \
 		git -C $(LLAMACPP_REPO) fetch --all && git -C $(LLAMACPP_REPO) checkout -q $(LLAMACPP_BASE_COMMIT)
 	@echo "Applying ollama patches..."
-	@git -c 'user.name=nobody' -c 'user.email=<>' -C $(LLAMACPP_REPO) am -3 $(LLAMACPP_PATCH_DIR)/*.patch || \
+	@cd $(LLAMACPP_REPO) && git -c 'user.name=nobody' -c 'user.email=<>' am -3 $(VENDOR_RELATIVE_PATCH_DIR)*.patch || \
 		echo "Please resolve the conflicts in $(LLAMACPP_REPO), and run 'git am --continue' to continue applying subsequent patches"
 	@echo ""
 	@echo "The tracking repo $(LLAMACPP_REPO) is now in a detached state with all patches applied."
@@ -44,7 +45,7 @@ create-patches: $(LLAMACPP_REPO)
   		echo "ERROR: Your llama.cpp repo is dirty.  You must commit any pending changes for format-patch to generate patches"; \
   		exit 1; \
 	fi
-	git -C $(LLAMACPP_REPO) format-patch --no-signature --no-numbered --zero-commit -o $(LLAMACPP_PATCH_DIR) $(LLAMACPP_BASE_COMMIT)
+	@cd $(LLAMACPP_REPO) && git format-patch --no-signature --no-numbered --zero-commit -o $(VENDOR_RELATIVE_PATCH_DIR) $(LLAMACPP_BASE_COMMIT)
 
 # Vendoring template logic
 EXCLUDED_FILES=sgemm.cpp sgemm.h sampling_ext.cpp sampling_ext.h stb_image.h json.hpp llama_darwin.c base64.hpp
@@ -86,12 +87,12 @@ LLAMACPP_FILES=\
 	include/llama.h \
 	ggml/src/llamafile/sgemm.cpp \
 	ggml/src/llamafile/sgemm.h
-$(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR))))
+$(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
 
 # llama.cpp files -> llama/llamafile
 LLAMAFILE_FILES= \
 	ggml/src/llamafile/sgemm.h
-$(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR)llamafile/)))
+$(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)llamafile/)))
 
 # ggml files -> llama/
 GGML_FILES= \
@@ -115,10 +116,10 @@ GGML_FILES= \
 	ggml/src/ggml-cpu-impl.h \
 	ggml/include/ggml-blas.h \
 	ggml/src/ggml-blas.cpp
-$(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR))))
+$(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
 
 # TODO generalize renaming pattern if we have more of these
-$(DST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal.m
+$(DEST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal.m
 	@echo "vendoring $(subst $(LLAMACPP_REPO),,$<)"; \
 		mkdir -p $(dir $@) && \
 		echo "/**" > $@ && \
@@ -128,20 +129,20 @@ $(DST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal.m
 		echo " */" >> $@ && \
 		echo "" >> $@ && \
 		cat $< >> $@
-VENDORED_FILES += $(DST_DIR)ggml-metal_darwin_arm64.m
+VENDORED_FILES += $(DEST_DIR)ggml-metal_darwin_arm64.m
 
 # ggml-cuda -> llama/ggml-cuda/
 GGML_CUDA_FILES= ggml/src/ggml-cuda/*.cu ggml/src/ggml-cuda/*.cuh
 GGML_CUDA_FILES_EXPANDED = $(addprefix ggml/src/ggml-cuda/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_CUDA_FILES)))))
-$(foreach name,$(GGML_CUDA_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DST_DIR)ggml-cuda/)))
+$(foreach name,$(GGML_CUDA_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DEST_DIR)ggml-cuda/)))
 
 GGML_TEMPLATE_FILES= ggml/src/ggml-cuda/template-instances/*.cu
 GGML_TEMPLATE_FILES_EXPANDED = 	$(addprefix ggml/src/ggml-cuda/template-instances/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_TEMPLATE_FILES)))))
-$(foreach name,$(GGML_TEMPLATE_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DST_DIR)ggml-cuda/template-instances/)))
+$(foreach name,$(GGML_TEMPLATE_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DEST_DIR)ggml-cuda/template-instances/)))
 
 GGML_VENDOR_FILES= ggml/src/ggml-cuda/vendors/*.h
 GGML_VENDOR_FILES_EXPANDED=$(addprefix ggml/src/ggml-cuda/vendors/,$(notdir $(wildcard $(addprefix $(LLAMACPP_REPO),$(GGML_VENDOR_FILES)))))
-$(foreach name,$(GGML_VENDOR_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DST_DIR)ggml-cuda/vendors/)))
+$(foreach name,$(GGML_VENDOR_FILES_EXPANDED),$(eval $(call vendor_file,$(name),$(DEST_DIR)ggml-cuda/vendors/)))
 
 # llava -> llama/
 LAVA_FILES= \
@@ -163,23 +164,23 @@ LAVA_FILES+= \
 	common/json-schema-to-grammar.cpp \
 	common/json-schema-to-grammar.h \
 	common/base64.hpp
-$(foreach name,$(LAVA_FILES),$(eval $(call vendor_file,$(name),$(DST_DIR))))
+$(foreach name,$(LAVA_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
 
-$(DST_DIR)build-info.cpp:
+$(DEST_DIR)build-info.cpp:
 	@echo "Generating $@"
 	@echo "int LLAMA_BUILD_NUMBER = 0;" > $@
 	@echo "char const *LLAMA_COMMIT = \"$(LLAMACPP_BASE_COMMIT)\";" >> $@
 	@echo "char const *LLAMA_COMPILER = \"\";" >> $@
 	@echo "char const *LLAMA_BUILD_TARGET = \"\";" >> $@
-VENDORED_FILES += $(DST_DIR)build-info.cpp
+VENDORED_FILES += $(DEST_DIR)build-info.cpp
 
 
 sync: $(LLAMACPP_REPO) .WAIT $(VENDORED_FILES) .WAIT remove-stale-files
 
 PATS=*.c *.h *.cpp *.m *.metal *.cu *.cuh
-NATIVE_DIRS=$(DST_DIR) $(DST_DIR)llamafile/ $(DST_DIR)ggml-cuda/ $(DST_DIR)ggml-cuda/template-instances/ $(DST_DIR)ggml-cuda/vendors/
+NATIVE_DIRS=$(DEST_DIR) $(DEST_DIR)llamafile/ $(DEST_DIR)ggml-cuda/ $(DEST_DIR)ggml-cuda/template-instances/ $(DEST_DIR)ggml-cuda/vendors/
 ALL_NATIVE_FILES=$(foreach dir,$(NATIVE_DIRS),$(wildcard $(addprefix $(dir),$(PATS))))
-EXTRA_NATIVE_FILES=$(filter-out $(VENDORED_FILES) $(addprefix $(DST_DIR),$(OLLAMA_NATIVE_FILES)), $(ALL_NATIVE_FILES))
+EXTRA_NATIVE_FILES=$(filter-out $(VENDORED_FILES) $(addprefix $(DEST_DIR),$(OLLAMA_NATIVE_FILES)), $(ALL_NATIVE_FILES))
 remove-stale-files:
 	@rm -f $(EXTRA_NATIVE_FILES)
 
diff --git a/llama/make/Makefile.test b/llama/make/Makefile.test
index 9b70e934..3b27d0db 100644
--- a/llama/make/Makefile.test
+++ b/llama/make/Makefile.test
@@ -3,13 +3,13 @@
 include make/common-defs.make
 
 test:
-	cd $(abspath $(SRC_DIR)/..) && go test ./... 
+	cd .. && go test ./... 
 
 integration: $(OLLAMA_EXE)
-	cd $(abspath $(SRC_DIR)/..) && go test --tags=integration ./integration -v
+	cd .. && go test --tags=integration ./integration -v
 
 lint:
-	cd $(abspath $(SRC_DIR)/..) && golangci-lint run -v
+	cd .. && golangci-lint run -v
 
 # Note: in this makefile we error instead of building to allow more fine-grain control of testing flows
 $(OLLAMA_EXE):
diff --git a/llama/make/common-defs.make b/llama/make/common-defs.make
index 77f8bca3..eb4e9217 100644
--- a/llama/make/common-defs.make
+++ b/llama/make/common-defs.make
@@ -21,18 +21,28 @@ export CGO_CXXFLAGS_ALLOW = -mfma|-mf16c
 export HIP_PLATFORM = amd
 export CGO_ENABLED=1
 
-SRC_DIR := $(dir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))
-BUILD_DIR = $(SRC_DIR)build/$(OS)-$(ARCH)
-DIST_BASE = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))
+ifneq ($(OS),windows)
+CCACHE:=$(shell command -v ccache 2>/dev/null || echo "")
+# Relative paths used to avoid tripping over spaces
+# working directory must be <repo>/llama/
+BUILD_DIR = ./build/$(OS)-$(ARCH)
+DIST_BASE = ../dist/$(OS)-$(ARCH)
+RUNNERS_PAYLOAD_DIR = ../build/$(OS)/$(ARCH)
+DIST_OLLAMA_EXE=$(DIST_BASE)/bin/ollama$(EXE_EXT)
+else
+# Absolute paths with cygpath to convert to 8.3 without spaces
+PWD="$(shell pwd)"
+CYGPWD=$(shell cygpath -m -s "$(PWD)")
+BUILD_DIR = $(CYGPWD)/build/$(OS)-$(ARCH)
+DIST_BASE = $(CYGPWD)/../dist/$(OS)-$(ARCH)
+RUNNERS_PAYLOAD_DIR = $(CYGPWD)/../build/$(OS)/$(ARCH)
+DIST_OLLAMA_EXE=$(DIST_BASE)/ollama$(EXE_EXT)
+endif
 DIST_LIB_DIR = $(DIST_BASE)/lib/ollama
 RUNNERS_DIST_DIR = $(DIST_LIB_DIR)/runners
-RUNNERS_PAYLOAD_DIR = $(abspath $(SRC_DIR)/../build/$(OS)/$(ARCH))
 RUNNERS_BUILD_DIR = $(BUILD_DIR)/runners
 DEFAULT_RUNNER := $(if $(and $(filter darwin,$(OS)),$(filter arm64,$(ARCH))),metal,cpu)
 GZIP:=$(shell command -v pigz 2>/dev/null || echo "gzip")
-ifneq ($(OS),windows)
-	CCACHE:=$(shell command -v ccache 2>/dev/null || echo "")
-endif
 VERSION?=$(shell git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")
 
 # Conditionally enable ccache for cgo builds too
@@ -56,7 +66,6 @@ endif
 
 ifeq ($(OS),windows)
 	CP := cp
-	SRC_DIR := $(shell cygpath -m -s "$(SRC_DIR)")
 	OBJ_EXT := obj
 	SHARED_EXT := dll
 	EXE_EXT := .exe
@@ -68,7 +77,7 @@ ifneq ($(HIP_PATH),)
 	export HIP_PATH
 endif
 else ifeq ($(OS),linux)
-	CP := cp -af
+	CP := cp -df
 	OBJ_EXT := o
 	SHARED_EXT := so
 	SHARED_PREFIX := lib
@@ -78,7 +87,7 @@ else
 	OBJ_EXT := o
 	SHARED_EXT := so
 	CPU_FLAG_PREFIX := -m
-	CP := cp -af
+	CP := cp -df
 endif
 
 COMMON_SRCS := \
@@ -88,4 +97,4 @@ COMMON_HDRS := \
 	$(wildcard *.h) \
 	$(wildcard *.hpp)
 
-OLLAMA_EXE=$(abspath $(SRC_DIR)../ollama$(EXE_EXT))
\ No newline at end of file
+OLLAMA_EXE=../ollama$(EXE_EXT)
\ No newline at end of file
diff --git a/llama/make/cuda.make b/llama/make/cuda.make
index aa5eb060..9e396d7c 100644
--- a/llama/make/cuda.make
+++ b/llama/make/cuda.make
@@ -36,7 +36,7 @@ else ifeq ($(OS),linux)
 	GPU_COMPILER_EXTRA_FLAGS = -fPIC -Wno-unused-function -std=c++11
 	GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
 endif
-GPU_DIST_DEPS_LIBS= $(sort $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_LIBS))))
+GPU_DIST_LIB_DEPS= $(sort $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_LIBS))))
 
 GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(CUDA_ARCHITECTURES)),--generate-code=arch=compute_$(arch)$(comma)code=[compute_$(arch)$(comma)sm_$(arch)]) \
 	-DGGML_CUDA_USE_GRAPHS=1
diff --git a/llama/make/gpu.make b/llama/make/gpu.make
index 39354fb9..2f5a58c0 100644
--- a/llama/make/gpu.make
+++ b/llama/make/gpu.make
@@ -78,10 +78,10 @@ $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
 	@-mkdir -p $(dir $@)
 	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CXXFLAGS) -o $@ $<
 $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/" $(CGO_EXTRA_LDFLAGS)
-$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
+$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(BUILD_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
 	@-mkdir -p $(dir $@)
 	GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
-$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
+$(BUILD_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
 	@-mkdir -p $(dir $@)
 	$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
 
@@ -89,13 +89,13 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREF
 $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
 	@-mkdir -p $(dir $@)
 	$(CP) $< $@
-$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_DEPS_LIBS)
-$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
+$(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)$(GPU_RUNNER_EXTRA_VARIANT)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_LIB_DEPS)
+$(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(BUILD_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 	@-mkdir -p $(dir $@)
 	$(CP) $< $@
-$(GPU_DIST_DEPS_LIBS): 
+$(GPU_DIST_LIB_DEPS):
 	@-mkdir -p $(dir $@)
-	$(CP) $(dir $(filter %$(notdir $@),$(GPU_LIBS) $(GPU_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
+	$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
 
 # Payload targets
 $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server