diff --git a/llama/README.md b/llama/README.md index 46179d48..7e02274a 100644 --- a/llama/README.md +++ b/llama/README.md @@ -12,7 +12,6 @@ Supported: - [x] Linux CUDA - [x] Linux ROCm - [x] Llava -- [x] Parallel Requests Extra build steps are required for CUDA and ROCm on Windows since `nvcc` and `hipcc` both require using msvc as the host compiler. For these small dlls are created: @@ -61,6 +60,8 @@ go build -tags=cuda . ## Windows +Download [w64devkit](https://github.com/skeeto/w64devkit/releases/latest) for a simple MinGW development environment. + ### CUDA Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-download-archive) then build the cuda code: @@ -95,9 +96,8 @@ go build -tags=rocm . ## Syncing with llama.cpp -To update this package to the latest llama.cpp code, use the `scripts/sync_llama.sh` script from the root of this repo: +To update this package to the latest llama.cpp code, use the `sync_llama.sh` script from the root of this repo: ``` -cd ollama -./scripts/sync_llama.sh ../llama.cpp +./sync_llama.sh ../../llama.cpp ``` diff --git a/llama/runner/README.md b/llama/runner/README.md index e7cf51c0..703b1dd1 100644 --- a/llama/runner/README.md +++ b/llama/runner/README.md @@ -1,11 +1,15 @@ # `runner` -A subprocess runner for loading a model and running inference via a small http web server. +A minimial runner for loading a model and running inference via a http web server. ``` ./runner -model ``` +### Completion + ``` -curl -X POST -H "Content-Type: application/json" -d '{"prompt": "hi"}' http://localhost:8080/ +curl -X POST -H "Content-Type: application/json" -d '{"prompt": "hi"}' http://localhost:8080/completion ``` + +### Embeddings diff --git a/scripts/sync_llama.sh b/llama/sync_llama.sh similarity index 96% rename from scripts/sync_llama.sh rename to llama/sync_llama.sh index 8f41e0bb..e979facd 100755 --- a/scripts/sync_llama.sh +++ b/llama/sync_llama.sh @@ -11,7 +11,7 @@ if [ -z "$src_dir" ]; then fi # Set the destination directory -dst_dir=./llama +dst_dir=. # llama.cpp cp $src_dir/unicode.cpp $dst_dir/unicode.cpp @@ -106,7 +106,7 @@ for IN in $dst_dir/*.{c,h,cpp,m,metal,cu}; do done # ggml-metal -sed -e '/#include "ggml-common.h"/r llama/ggml-common.h' -e '/#include "ggml-common.h"/d' < $dst_dir/ggml-metal.metal > temp.metal +sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < $dst_dir/ggml-metal.metal > temp.metal TEMP_ASSEMBLY=$(mktemp) echo ".section __DATA, __ggml_metallib" > $TEMP_ASSEMBLY echo ".globl _ggml_metallib_start" >> $TEMP_ASSEMBLY