If the user checks out the repo in a path that contains spaces, make gets really confused so use relative paths for everything in-repo to avoid breakage.
63 lines
2.9 KiB
Makefile
63 lines
2.9 KiB
Makefile
# Common definitions for all cuda versions
|
|
|
|
ifndef GPU_RUNNER_VARIANT
|
|
dummy:
|
|
$(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables)
|
|
endif
|
|
|
|
|
|
GPU_RUNNER_NAME := cuda$(GPU_RUNNER_VARIANT)
|
|
GPU_RUNNER_GO_TAGS := cuda cuda$(GPU_RUNNER_VARIANT)
|
|
GPU_RUNNER_DRIVER_LIB_LINK := -lcuda
|
|
GPU_RUNNER_LIBS_SHORT := cublas cudart cublasLt
|
|
GPU_LIB_DIR_WIN = $(GPU_PATH_ROOT_WIN)/bin
|
|
ifneq ($(GPU_PATH_ROOT_LINUX),)
|
|
GPU_LIB_DIR_LINUX=$(strip $(shell ls -d $(GPU_PATH_ROOT_LINUX)/lib64 2>/dev/null || ls -d $(GPU_PATH_ROOT_LINUX)/lib 2>/dev/null))
|
|
GPU_COMPILER_LINUX = $(GPU_PATH_ROOT_LINUX)/bin/nvcc
|
|
endif
|
|
CGO_EXTRA_LDFLAGS_WIN = -L"$(GPU_PATH_ROOT_WIN)/lib/x64"
|
|
GPU_COMPILER_WIN = $(GPU_PATH_ROOT_WIN)/bin/nvcc
|
|
GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
|
|
GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
|
|
GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
|
|
GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -Xcompiler -fPIC -D_GNU_SOURCE
|
|
|
|
ifeq ($(OS),windows)
|
|
# On windows, nvcc uses msvc which does not support avx512vbmi avx512vnni avx512bf16, but macros can turn them on
|
|
GPU_VECTOR_FLAGS=$(call uc,$(filter-out avx512bf16,$(filter-out avx512vnni,$(filter-out avx512vbmi,$(GPU_RUNNER_CPU_FLAGS)))))
|
|
GPU_COMPILER_EXTRA_FLAGS=$(if $(filter avx512vbmi,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512VBMI__)
|
|
GPU_COMPILER_EXTRA_FLAGS+=$(if $(filter avx512vnni,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512VNNI__)
|
|
GPU_COMPILER_EXTRA_FLAGS+=$(if $(filter avx512bf16,$(GPU_RUNNER_CPU_FLAGS)),-D__AVX512BF16__)
|
|
GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
|
|
else ifeq ($(OS),linux)
|
|
# On linux, nvcc requires avx512 -> -mavx512f -mavx512dq -mavx512bw
|
|
GPU_VECTOR_FLAGS=$(if $(filter avx512,$(GPU_RUNNER_CPU_FLAGS)),avx512f avx512dq avx512bw) $(filter-out avx512,$(GPU_RUNNER_CPU_FLAGS))
|
|
CUDA_PATH?=/usr/local/cuda
|
|
GPU_COMPILER_EXTRA_FLAGS = -fPIC -Wno-unused-function -std=c++11
|
|
GPU_LIBS = $(sort $(wildcard $(addsuffix *.$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))))
|
|
endif
|
|
GPU_DIST_LIB_DEPS= $(sort $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_LIBS))))
|
|
|
|
GPU_RUNNER_ARCH_FLAGS := $(foreach arch,$(subst ;,$(space),$(CUDA_ARCHITECTURES)),--generate-code=arch=compute_$(arch)$(comma)code=[compute_$(arch)$(comma)sm_$(arch)]) \
|
|
-DGGML_CUDA_USE_GRAPHS=1
|
|
GPU_COMPILER_CUFLAGS = \
|
|
$(GPU_COMPILER_EXTRA_FLAGS) \
|
|
-Xcompiler "$(addprefix $(CPU_FLAG_PREFIX),$(GPU_VECTOR_FLAGS))" \
|
|
-t2 \
|
|
-DGGML_CUDA_DMMV_X=32 \
|
|
-DGGML_CUDA_MMV_Y=1 \
|
|
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
|
|
-DGGML_USE_CUDA=1 \
|
|
-DGGML_SHARED=1 \
|
|
-DGGML_BUILD=1 \
|
|
-DGGML_USE_LLAMAFILE \
|
|
-DK_QUANTS_PER_ITERATION=2 \
|
|
-DNDEBUG \
|
|
-D_GNU_SOURCE \
|
|
-D_XOPEN_SOURCE=600 \
|
|
-Wno-deprecated-gpu-targets \
|
|
--forward-unknown-to-host-compiler \
|
|
-use_fast_math \
|
|
-I. \
|
|
-O3
|