Merge branch 'main' into feature/kv-quant
This commit is contained in:
commit
bc3c08c17f
@ -136,7 +136,7 @@ Type: filesandordirs; Name: "{%TEMP}\ollama*"
|
||||
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
|
||||
|
||||
[Messages]
|
||||
WizardReady=Ollama Windows Preview
|
||||
WizardReady=Ollama
|
||||
ReadyLabel1=%nLet's get you up and running with your own large language models.
|
||||
SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit.
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "gpu_info_nvcuda.h"
|
||||
|
||||
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
|
||||
LOG(resp->ch.verbose, "initializing %s\n", nvcuda_lib_path);
|
||||
CUresult ret;
|
||||
resp->err = NULL;
|
||||
resp->num_devices = 0;
|
||||
@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
|
||||
resp->cudaErr = -1;
|
||||
return;
|
||||
}
|
||||
LOG(resp->ch.verbose, "dlsym: %s - %p\n", l[i].s, *l[i].p);
|
||||
}
|
||||
|
||||
LOG(resp->ch.verbose, "calling cuInit\n");
|
||||
ret = (*resp->ch.cuInit)(0);
|
||||
if (ret != CUDA_SUCCESS) {
|
||||
LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
|
||||
@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
|
||||
resp->ch.driver_minor = 0;
|
||||
|
||||
// Report driver version if we're in verbose mode, ignore errors
|
||||
LOG(resp->ch.verbose, "calling cuDriverGetVersion\n");
|
||||
ret = (*resp->ch.cuDriverGetVersion)(&version);
|
||||
if (ret != CUDA_SUCCESS) {
|
||||
LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
|
||||
} else {
|
||||
LOG(resp->ch.verbose, "raw version 0x%x\n", version);
|
||||
resp->ch.driver_major = version / 1000;
|
||||
resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
|
||||
LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
|
||||
}
|
||||
|
||||
LOG(resp->ch.verbose, "calling cuDeviceGetCount\n");
|
||||
ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
|
||||
if (ret != CUDA_SUCCESS) {
|
||||
LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
|
||||
@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
|
||||
resp->cudaErr = ret;
|
||||
return;
|
||||
}
|
||||
LOG(resp->ch.verbose, "device count %d\n", resp->num_devices);
|
||||
}
|
||||
|
||||
const int buflen = 256;
|
||||
|
@ -108,7 +108,7 @@ Custom CPU settings are not currently supported in the new Go server build but w
|
||||
|
||||
#### Containerized Linux Build
|
||||
|
||||
If you have Docker available, you can build linux binaries with `OLLAMA_NEW_RUNNERS=1 ./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
|
||||
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included. The resulting binary is placed in `./dist`
|
||||
|
||||
### Windows
|
||||
|
||||
|
@ -58,6 +58,8 @@ endif
|
||||
GPU_COMPILER_CUFLAGS = \
|
||||
$(GPU_COMPILER_FPIC) \
|
||||
$(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \
|
||||
-mf16c \
|
||||
-mfma \
|
||||
-parallel-jobs=2 \
|
||||
-c \
|
||||
-O3 \
|
||||
@ -77,6 +79,9 @@ GPU_COMPILER_CUFLAGS = \
|
||||
-D_CRT_SECURE_NO_WARNINGS \
|
||||
-D_GNU_SOURCE \
|
||||
-D_XOPEN_SOURCE=600 \
|
||||
-DUSE_PROF_API=1 \
|
||||
-std=gnu++14 \
|
||||
-x hip \
|
||||
-mllvm=-amdgpu-early-inline-all=true \
|
||||
-mllvm=-amdgpu-function-calls=false \
|
||||
-Wno-expansion-to-defined \
|
||||
@ -87,6 +92,12 @@ GPU_COMPILER_CUFLAGS = \
|
||||
-Wno-unused-result \
|
||||
-I.
|
||||
|
||||
# Workaround buggy P2P copy on some windows multi-GPU setups
|
||||
# This workaround breaks linux systems with small system RAM, so only enable on windows
|
||||
ifeq ($(OS),windows)
|
||||
GPU_COMPILER_CUFLAGS += -DGGML_CUDA_NO_PEER_COPY=1
|
||||
endif
|
||||
|
||||
include make/gpu.make
|
||||
|
||||
# Adjust the rules from gpu.make to handle the ROCm dependencies properly
|
||||
|
@ -85,7 +85,7 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS
|
||||
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
|
||||
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
|
||||
@-mkdir -p $(dir $@)
|
||||
$(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
|
||||
$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
|
||||
|
||||
# Distribution targets
|
||||
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
|
||||
|
Loading…
x
Reference in New Issue
Block a user