From 28bedcd807291bf97b64570349d7e7533f6bed78 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Mon, 20 May 2024 15:27:10 -0700 Subject: [PATCH] wip --- llama/build_hipblas.sh | 8 ++++++-- llama/llama.go | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) mode change 100644 => 100755 llama/build_hipblas.sh diff --git a/llama/build_hipblas.sh b/llama/build_hipblas.sh old mode 100644 new mode 100755 index f09106e6..3d374eb8 --- a/llama/build_hipblas.sh +++ b/llama/build_hipblas.sh @@ -22,8 +22,11 @@ linux_archs=( os="$(uname -s)" +additional_flags="" + if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then output="ggml-hipblas.dll" + additional_flags="-Xclang --dependent-lib=msvcrt -Wl,/subsystem:console" else output="libggml-hipblas.so" archs+=("${linux_archs[@]}") @@ -37,7 +40,6 @@ done hipcc \ -parallel-jobs=12 \ -O3 \ - $offload_arch_flags \ -DGGML_USE_CUDA \ -DGGML_BUILD=1 \ -DGGML_SHARED=1 \ @@ -51,7 +53,9 @@ hipcc \ -DK_QUANTS_PER_ITERATION=2 \ -D_CRT_SECURE_NO_WARNINGS \ -DCMAKE_POSITION_INDEPENDENT_CODE=on \ - -Xclang --dependent-lib=msvcrt -Wl,/subsystem:console \ + -D_GNU_SOURCE \ + $offload_arch_flags \ + $additional_flags \ -Wno-expansion-to-defined \ -Wno-invalid-noreturn \ -Wno-ignored-attributes \ diff --git a/llama/llama.go b/llama/llama.go index 0de3e21f..b2897582 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -17,10 +17,13 @@ package llama // #cgo avx2 CXXFLAGS: -mavx2 -mfma // #cgo cuda CFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 // #cgo cuda CXXFLAGS: -DGGML_USE_CUDA -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 -// #cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_MULTIPLATFORM -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 +// #cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 +// #cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1 +// #cgo rocm LDFLAGS: -L${SRCDIR} -lggml-hipblas -lhipblas -lamdhip64 -lrocblas // #cgo windows,cuda LDFLAGS: -L. -L"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.3/lib/x64" -lggml-cuda -lcuda -lcudart -lcublas -lcublasLt -// #cgo windows,rocm LDFLAGS: -L. -L"C:/Program Files/AMD/ROCm/5.7/lib" -lggml-hipblas -lhipblas -lamdhip64 -lrocblas +// #cgo windows,rocm LDFLAGS: -L. -L"C:/Program Files/AMD/ROCm/5.7/lib" // #cgo linux,cuda LDFLAGS: -L${SRCDIR} -L/usr/local/cuda/lib64 -lggml-cuda -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt +// #cgo linux,rocm LDFLAGS: -L/opt/rocm/lib // #include // #include "llama.h" // #include "clip.h"