96 lines
2.4 KiB
Bash
Executable File
96 lines
2.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
archs=(
|
|
gfx900
|
|
gfx940
|
|
gfx941
|
|
gfx942
|
|
gfx1010
|
|
gfx1012
|
|
gfx1030
|
|
gfx1100
|
|
gfx1101
|
|
gfx1102
|
|
)
|
|
|
|
linux_archs=(
|
|
gfx906:xnack-
|
|
gfx908:xnack-
|
|
gfx90a:xnack+
|
|
gfx90a:xnack-
|
|
)
|
|
|
|
os="$(uname -s)"
|
|
|
|
additional_flags=""
|
|
|
|
if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
|
|
output="ggml-hipblas.dll"
|
|
additional_flags=" -Xclang --dependent-lib=msvcrt"
|
|
else
|
|
output="libggml-hipblas.so"
|
|
archs+=("${linux_archs[@]}")
|
|
fi
|
|
|
|
for arch in "${archs[@]}"; do
|
|
additional_flags+=" --offload-arch=$arch"
|
|
done
|
|
|
|
# Create an array of all source files, expanding globs
|
|
sources=(
|
|
$(echo ggml-cuda/template-instances/fattn-wmma*.cu)
|
|
$(echo ggml-cuda/template-instances/mmq*.cu)
|
|
$(echo ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)
|
|
$(echo ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)
|
|
$(echo ggml-cuda/template-instances/fattn-vec*f16-f16.cu)
|
|
ggml-cuda.cu
|
|
$(echo ggml-cuda/*.cu)
|
|
ggml.c
|
|
ggml-backend.c
|
|
ggml-alloc.c
|
|
ggml-quants.c
|
|
sgemm.cpp
|
|
)
|
|
|
|
# Function to compile a single source file
|
|
compile_source() {
|
|
src="$1"
|
|
hipcc -c -O3 -DGGML_USE_CUDA -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 \
|
|
-DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_HIPBLAS -DGGML_USE_LLAMAFILE -DHIP_FAST_MATH -DNDEBUG \
|
|
-DK_QUANTS_PER_ITERATION=2 -D_CRT_SECURE_NO_WARNINGS -DCMAKE_POSITION_INDEPENDENT_CODE=on \
|
|
-D_GNU_SOURCE -Wno-expansion-to-defined -Wno-invalid-noreturn -Wno-ignored-attributes -Wno-pass-failed \
|
|
-Wno-deprecated-declarations -Wno-unused-result -I. \
|
|
$additional_flags -o "${src%.cu}.o" "$src"
|
|
}
|
|
|
|
# Function to handle Ctrl+C
|
|
cleanup() {
|
|
echo "Terminating all background processes..."
|
|
kill 0
|
|
}
|
|
|
|
# Set trap to handle SIGINT (Ctrl+C)
|
|
trap cleanup SIGINT
|
|
|
|
# Limit the number of concurrent jobs
|
|
max_jobs=$(nproc)
|
|
job_count=0
|
|
|
|
for src in "${sources[@]}"; do
|
|
echo "$src"
|
|
compile_source "$src" &
|
|
job_count=$((job_count + 1))
|
|
if [[ $job_count -ge $max_jobs ]]; then
|
|
wait -n
|
|
job_count=$((job_count - 1))
|
|
fi
|
|
done
|
|
|
|
wait
|
|
|
|
# Link all object files into a shared library
|
|
echo "Linking object files..."
|
|
hipcc -v -shared -o $output *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o -lhipblas -lamdhip64 -lrocblas
|
|
|
|
# Clean up object files after linking
|
|
rm -f *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o |