From b1696e308e3b34e8fa4367616220d570ad35c30d Mon Sep 17 00:00:00 2001 From: jmorganca Date: Sat, 18 May 2024 23:07:19 -0400 Subject: [PATCH] Add missing hipcc flags --- llama/.gitignore | 1 + llama/hipblas.sh | 85 +++++++++++------------------------------------- 2 files changed, 20 insertions(+), 66 deletions(-) diff --git a/llama/.gitignore b/llama/.gitignore index 43b6939d..9710205b 100644 --- a/llama/.gitignore +++ b/llama/.gitignore @@ -2,3 +2,4 @@ *.gguf *.lib *.exp +*.dll diff --git a/llama/hipblas.sh b/llama/hipblas.sh index 64deae15..a9a855df 100644 --- a/llama/hipblas.sh +++ b/llama/hipblas.sh @@ -1,7 +1,16 @@ hipcc \ -parallel-jobs=12 \ -O3 \ + --offload-arch=gfx900 \ + --offload-arch=gfx940 \ + --offload-arch=gfx941 \ + --offload-arch=gfx942 \ + --offload-arch=gfx1010 \ + --offload-arch=gfx1012 \ + --offload-arch=gfx1030 \ --offload-arch=gfx1100 \ + --offload-arch=gfx1101 \ + --offload-arch=gfx1102 \ -DGGML_USE_CUDA \ -DGGML_BUILD=1 \ -DGGML_SHARED=1 \ @@ -10,6 +19,8 @@ hipcc \ -DGGML_SCHED_MAX_COPIES=4 \ -DGGML_USE_HIPBLAS \ -DGGML_USE_LLAMAFILE \ + -DHIP_FAST_MATH \ + -DNDEBUG \ -DK_QUANTS_PER_ITERATION=2 \ -D_CRT_SECURE_NO_WARNINGS \ -Xclang --dependent-lib=msvcrt -Wl,/subsystem:console \ @@ -22,70 +33,12 @@ hipcc \ -lhipblas -lamdhip64 -lrocblas \ -shared \ -o ggml-hipblas.dll \ - ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c + ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp -# --offload-arch=gfx900 \ -# --offload-arch=gfx906:xnack- \ -# --offload-arch=gfx908:xnack- \ -# --offload-arch=gfx90a:xnack+ \ -# --offload-arch=gfx90a:xnack- \ -# --offload-arch=gfx940 \ -# --offload-arch=gfx941 \ -# --offload-arch=gfx942 \ -# --offload-arch=gfx1010 \ -# --offload-arch=gfx1012 \ -# --offload-arch=gfx1030 \ -# --offload-arch=gfx1100 \ -# --offload-arch=gfx1101 \ -# --offload-arch=gfx1102 \ - -# hipcc \ -# -v \ -# -parallel-jobs=12 \ -# --offload-arch=gfx900 \ -# --offload-arch=gfx940 \ -# --offload-arch=gfx941 \ -# --offload-arch=gfx942 \ -# --offload-arch=gfx1010 \ -# --offload-arch=gfx1012 \ -# --offload-arch=gfx1030 \ -# --offload-arch=gfx1100 \ -# --offload-arch=gfx1101 \ -# --offload-arch=gfx1102 \ -# -DLLAMA_BUILD \ -# -DLLAMA_SHARED \ -# -DK_QUANTS_PER_ITERATION=2 \ -# -DGGML_USE_CUDA=1 \ -# -DGGML_USE_HIPBLAS \ -# -DGGML_CUDA_DMMV_X=32 \ -# -DGGML_CUDA_MMV_Y=1 \ -# -DGGML_CUDA_FORCE_DMMV \ -# -DNDEBUG \ -# -D_DLL \ -# -D_MT \ -# -D_XOPEN_SOURCE=600 \ -# -D_CRT_SECURE_NO_WARNINGS \ -# -Xclang \ -# --dependent-lib=msvcrt \ -# -std=gnu++14 \ -# -Wmissing-declarations \ -# -Wmissing-noreturn \ -# -Wall \ -# -Wextra \ -# -Wpedantic \ -# -Wcast-qual \ -# -Wno-unused-function \ -# -Wunreachable-code-break \ -# -Wunreachable-code-return \ -# -Wmissing-prototypes \ -# -Wextra-semi \ -# -Wno-cast-qual \ -# -I. \ -# -lhipblas -lamdhip64 -lrocblas \ -# -Wno-expansion-to-defined \ -# -Wno-invalid-noreturn \ -# -Wno-deprecated-declarations \ -# -mllvm \ -# -x hip \ -# -shared -o ggml-cuda.dll \ -# ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c + # --offload-arch='gfx906:xnack-' \ + # --offload-arch='gfx908:xnack-' \ + # --offload-arch='gfx90a:xnack+' \ + # --offload-arch='gfx90a:xnack-' \ + # -D_DLL \ + # -D_MT \ + # -D_XOPEN_SOURCE=600 \