diff --git a/llama/README.md b/llama/README.md index 00d04c81..d7004086 100644 --- a/llama/README.md +++ b/llama/README.md @@ -58,9 +58,9 @@ go build -tags=cuda . ### ROCm -Install [ROCm 5.7.1](https://rocm.docs.amd.com/en/docs-5.7.1/) and [Strawberry Perl](https://strawberryperl.com/): +Install [ROCm 5.7.1](https://rocm.docs.amd.com/en/docs-5.7.1/) and [Strawberry Perl](https://strawberryperl.com/). -Build `ggml-hipblas.dll`: +Then, build `ggml-hipblas.dll`: ```shell ./hipblas.sh diff --git a/llama/ggml-metal.m b/llama/ggml-metal.m index b0b16dbf..926e06e9 100644 --- a/llama/ggml-metal.m +++ b/llama/ggml-metal.m @@ -1,3 +1,4 @@ +//go:build darwin,arm64 #import "ggml-metal.h" #import "ggml-backend-impl.h" diff --git a/llama/llama.go b/llama/llama.go index ca78eee9..9fb52022 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -2,8 +2,9 @@ package llama // #cgo darwin,arm64 CFLAGS: -std=c11 -DGGML_USE_METAL -DGGML_METAL_EMBED_LIBRARY -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 // #cgo darwin,arm64 CXXFLAGS: -std=c++11 -DGGML_USE_METAL -DGGML_METAL_EMBED_LIBRARY -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -// #cgo darwin,amd64 CXXFLAGS: -std=c++11 // #cgo darwin,arm64 LDFLAGS: -ld_classic ${SRCDIR}/ggml-metal.o -framework Foundation -framework Metal -framework MetalKit -framework Accelerate +// #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers +// #cgo darwin,amd64 CXXFLAGS: -std=c++11 -Wno-incompatible-pointer-types-discards-qualifiers // #cgo darwin,amd64 LDFLAGS: -ld_classic -framework Foundation -framework Accelerate // #cgo windows LDFLAGS: -lmsvcrt // #cgo avx CFLAGS: -mavx diff --git a/llama/sync.sh b/llama/sync.sh index 2442855b..5fc00222 100755 --- a/llama/sync.sh +++ b/llama/sync.sh @@ -6,51 +6,73 @@ src_dir=$1 # Set the destination directory (current directory) dst_dir="." -# llama.cpp -cp $src_dir/unicode.cpp $dst_dir/unicode.cpp -cp $src_dir/unicode.h $dst_dir/unicode.h -cp $src_dir/unicode-data.cpp $dst_dir/unicode-data.cpp -cp $src_dir/unicode-data.h $dst_dir/unicode-data.h -cp $src_dir/llama.cpp $dst_dir/llama.cpp -cp $src_dir/llama.h $dst_dir/llama.h -cp $src_dir/sgemm.cpp $dst_dir/sgemm.cpp -cp $src_dir/sgemm.h $dst_dir/sgemm.h +# # llama.cpp +# cp $src_dir/unicode.cpp $dst_dir/unicode.cpp +# cp $src_dir/unicode.h $dst_dir/unicode.h +# cp $src_dir/unicode-data.cpp $dst_dir/unicode-data.cpp +# cp $src_dir/unicode-data.h $dst_dir/unicode-data.h +# cp $src_dir/llama.cpp $dst_dir/llama.cpp +# cp $src_dir/llama.h $dst_dir/llama.h +# cp $src_dir/sgemm.cpp $dst_dir/sgemm.cpp +# cp $src_dir/sgemm.h $dst_dir/sgemm.h -# ggml -cp $src_dir/ggml.c $dst_dir/ggml.c -cp $src_dir/ggml.h $dst_dir/ggml.h -cp $src_dir/ggml-quants.c $dst_dir/ggml-quants.c -cp $src_dir/ggml-quants.h $dst_dir/ggml-quants.h -cp $src_dir/ggml-metal.metal $dst_dir/ggml-metal.metal -cp $src_dir/ggml-metal.h $dst_dir/ggml-metal.h -cp $src_dir/ggml-metal.m $dst_dir/ggml-metal.m -cp $src_dir/ggml-impl.h $dst_dir/ggml-impl.h -cp $src_dir/ggml-cuda.h $dst_dir/ggml-cuda.h -cp $src_dir/ggml-cuda.cu $dst_dir/ggml-cuda.cu -cp $src_dir/ggml-common.h $dst_dir/ggml-common.h -cp $src_dir/ggml-backend.h $dst_dir/ggml-backend.h -cp $src_dir/ggml-backend.c $dst_dir/ggml-backend.c -cp $src_dir/ggml-backend-impl.h $dst_dir/ggml-backend-impl.h -cp $src_dir/ggml-alloc.h $dst_dir/ggml-alloc.h -cp $src_dir/ggml-alloc.c $dst_dir/ggml-alloc.c +# # ggml +# cp $src_dir/ggml.c $dst_dir/ggml.c +# cp $src_dir/ggml.h $dst_dir/ggml.h +# cp $src_dir/ggml-quants.c $dst_dir/ggml-quants.c +# cp $src_dir/ggml-quants.h $dst_dir/ggml-quants.h +# cp $src_dir/ggml-metal.metal $dst_dir/ggml-metal.metal +# cp $src_dir/ggml-metal.h $dst_dir/ggml-metal.h +# cp $src_dir/ggml-metal.m $dst_dir/ggml-metal.m +# cp $src_dir/ggml-impl.h $dst_dir/ggml-impl.h +# cp $src_dir/ggml-cuda.h $dst_dir/ggml-cuda.h +# cp $src_dir/ggml-cuda.cu $dst_dir/ggml-cuda.cu +# cp $src_dir/ggml-common.h $dst_dir/ggml-common.h +# cp $src_dir/ggml-backend.h $dst_dir/ggml-backend.h +# cp $src_dir/ggml-backend.c $dst_dir/ggml-backend.c +# cp $src_dir/ggml-backend-impl.h $dst_dir/ggml-backend-impl.h +# cp $src_dir/ggml-alloc.h $dst_dir/ggml-alloc.h +# cp $src_dir/ggml-alloc.c $dst_dir/ggml-alloc.c -# ggml-cuda -mkdir -p $dst_dir/ggml-cuda -cp $src_dir/ggml-cuda/*.cu $dst_dir/ggml-cuda/ -cp $src_dir/ggml-cuda/*.cuh $dst_dir/ggml-cuda/ +# # ggml-cuda +# mkdir -p $dst_dir/ggml-cuda +# cp $src_dir/ggml-cuda/*.cu $dst_dir/ggml-cuda/ +# cp $src_dir/ggml-cuda/*.cuh $dst_dir/ggml-cuda/ -sed -i 's/extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/\/\/ extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/' ggml-cuda.cu -sed -i '34iGGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);' ggml-cuda.h +# sed -i 's/extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/\/\/ extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/' ggml-cuda.cu +# sed -i '34iGGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);' ggml-cuda.h -# ggml-metal -sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > temp.metal -TEMP_ASSEMBLY=$(mktemp) -echo ".section __DATA, __ggml_metallib" > $TEMP_ASSEMBLY -echo ".globl _ggml_metallib_start" >> $TEMP_ASSEMBLY -echo "_ggml_metallib_start:" >> $TEMP_ASSEMBLY -echo ".incbin \"temp.metal\"" >> $TEMP_ASSEMBLY -echo ".globl _ggml_metallib_end" >> $TEMP_ASSEMBLY -echo "_ggml_metallib_end:" >> $TEMP_ASSEMBLY -as -mmacosx-version-min=11.3 $TEMP_ASSEMBLY -o ggml-metal.o -rm -f $TEMP_ASSEMBLY -rm -rf temp.metal +# # ggml-metal +# sed -i '' '1s;^;//go:build darwin,arm64\n;' ggml-metal.m +# sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > temp.metal +# TEMP_ASSEMBLY=$(mktemp) +# echo ".section __DATA, __ggml_metallib" > $TEMP_ASSEMBLY +# echo ".globl _ggml_metallib_start" >> $TEMP_ASSEMBLY +# echo "_ggml_metallib_start:" >> $TEMP_ASSEMBLY +# echo ".incbin \"temp.metal\"" >> $TEMP_ASSEMBLY +# echo ".globl _ggml_metallib_end" >> $TEMP_ASSEMBLY +# echo "_ggml_metallib_end:" >> $TEMP_ASSEMBLY +# as -mmacosx-version-min=11.3 $TEMP_ASSEMBLY -o ggml-metal.o +# rm -f $TEMP_ASSEMBLY +# rm -rf temp.metal + +# add license info +LICENSE=$(mktemp) +cleanup() { + rm -f $LICENSE +} +trap cleanup 0 + +cat <$LICENSE +/** + * llama.cpp - git $SHA1 + * +$(sed 's/^/ * /' <$1/$src_dir/LICENSE) + */ + +for IN in $OUT/*.{c,h,cpp,m,metal,cu}; do + TMP=$(mktemp) + status "updating license $IN" + cat $LICENSE $IN >$TMP + mv $TMP $IN +done diff --git a/llama/runner/README.md b/runner/README.md similarity index 100% rename from llama/runner/README.md rename to runner/README.md diff --git a/llama/runner/runner.go b/runner/runner.go similarity index 100% rename from llama/runner/runner.go rename to runner/runner.go diff --git a/scripts/build_llama_darwin.sh b/scripts/build_llama_darwin.sh new file mode 100755 index 00000000..8ec48fe3 --- /dev/null +++ b/scripts/build_llama_darwin.sh @@ -0,0 +1,25 @@ +#!/bin/sh + +set -e + +export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} +export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" + +mkdir -p dist + +# amd64 runners +export CGO_CFLAGS_ALLOW=-mfma +export CGO_CXXFLAGS_ALLOW=-mfma +CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -trimpath -o dist/ollama_llama_runner_darwin_amd64 ./runner & +CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -tags avx -trimpath -o dist/ollama_llama_runner_darwin_amd64_avx ./runner & +CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -tags avx,avx2 -trimpath -o dist/ollama_llama_runner_darwin_amd64_avx2 ./runner & +wait + +# amd64 +CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -o dist/ollama_darwin_amd64 . + +# arm64 runner +CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -trimpath -o dist/ollama_llama_runner_darwin_arm64 ./runner + +# arm64 +CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -trimpath -o dist/ollama_darwin_arm64 .