74 lines
1.4 KiB
Bash
Executable File
74 lines
1.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
archs=(
|
|
gfx900
|
|
gfx940
|
|
gfx941
|
|
gfx942
|
|
gfx1010
|
|
gfx1012
|
|
gfx1030
|
|
gfx1100
|
|
gfx1101
|
|
gfx1102
|
|
)
|
|
|
|
linux_archs=(
|
|
gfx906:xnack-
|
|
gfx908:xnack-
|
|
gfx90a:xnack+
|
|
gfx90a:xnack-
|
|
)
|
|
|
|
os="$(uname -s)"
|
|
|
|
additional_flags=""
|
|
|
|
if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
|
|
output="ggml-hipblas.dll"
|
|
additional_flags=" -Xclang --dependent-lib=msvcrt -Wl,/subsystem:console"
|
|
else
|
|
output="libggml-hipblas.so"
|
|
archs+=("${linux_archs[@]}")
|
|
fi
|
|
|
|
for arch in "${archs[@]}"; do
|
|
additional_flags+=" --offload-arch=$arch"
|
|
done
|
|
|
|
hipcc \
|
|
-v \
|
|
-parallel-jobs=12 \
|
|
-O3 \
|
|
-DGGML_USE_CUDA \
|
|
-DGGML_BUILD=1 \
|
|
-DGGML_SHARED=1 \
|
|
-DGGML_CUDA_DMMV_X=32 \
|
|
-DGGML_CUDA_MMV_Y=1 \
|
|
-DGGML_SCHED_MAX_COPIES=4 \
|
|
-DGGML_USE_HIPBLAS \
|
|
-DGGML_USE_LLAMAFILE \
|
|
-DHIP_FAST_MATH \
|
|
-DNDEBUG \
|
|
-DK_QUANTS_PER_ITERATION=2 \
|
|
-D_CRT_SECURE_NO_WARNINGS \
|
|
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
|
|
-D_GNU_SOURCE \
|
|
-Wno-expansion-to-defined \
|
|
-Wno-invalid-noreturn \
|
|
-Wno-ignored-attributes \
|
|
-Wno-pass-failed \
|
|
-Wno-deprecated-declarations \
|
|
-Wno-unused-result \
|
|
-I. \
|
|
-fPIC \
|
|
-lhipblas -lamdhip64 -lrocblas \
|
|
-shared \
|
|
$additional_flags \
|
|
-o $output \
|
|
ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
|
|
|
|
# -D_DLL \
|
|
# -D_MT \
|
|
# -D_XOPEN_SOURCE=600 \
|