From fa24e73b8253a554ec840395a5d1dfdb91d3598b Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Thu, 21 Dec 2023 16:54:54 -0800
Subject: [PATCH] Remove CPU build, fixup linux build script

---
 Dockerfile.build                              | 10 +++---
 Dockerfile.cpu                                | 35 -------------------
 llm/llama.cpp/gen_common.sh                   |  2 +-
 llm/llama.cpp/gen_linux.sh                    |  5 +--
 .../0001-Expose-callable-API-for-server.patch | 13 ++++---
 scripts/build_linux.sh                        | 14 +++-----
 6 files changed, 21 insertions(+), 58 deletions(-)
 delete mode 100644 Dockerfile.cpu

diff --git a/Dockerfile.build b/Dockerfile.build
index 6b7e3c4d..c8170919 100644
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -4,6 +4,7 @@ ARG CUDA_VERSION=11.3.1-1
 ARG CMAKE_VERSION=3.22.1
 # ROCm only supports amd64
 ARG ROCM_VERSION=6.0
+ARG CLBLAST_VER=1.6.1
 
 # Note: https://rocm.docs.amd.com/en/latest/release/user_kernel_space_compat_matrix.html
 RUN apt-get update && \
@@ -23,6 +24,10 @@ RUN apt-get update && \
     apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION} rocm-hip-libraries rocm-device-libs rocm-libs rocm-ocl-icd rocm-hip-sdk rocm-hip-libraries rocm-cmake rocm-clang-ocl rocm-dev
 
+# CLBlast
+RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \
+    cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install
+
 ENV ROCM_PATH=/opt/rocm
 
 # Ubuntu 22.04 arm64 dependencies
@@ -45,7 +50,6 @@ FROM base-${TARGETARCH}
 ARG TARGETARCH
 ARG GOFLAGS="'-ldflags -w -s'"
 ARG CGO_CFLAGS
-ARG CLBLAST_VER=1.6.1
 ARG GOLANG_VERSION=1.21.3
 
 # Common toolchain
@@ -53,10 +57,6 @@ RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \
     update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10
 
-# CLBlast
-RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \
-    cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install
-
 # install go
 ADD https://dl.google.com/go/go${GOLANG_VERSION}.linux-$TARGETARCH.tar.gz /tmp/go${GOLANG_VERSION}.tar.gz
 RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go${GOLANG_VERSION}.tar.gz
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
deleted file mode 100644
index dd6926df..00000000
--- a/Dockerfile.cpu
+++ /dev/null
@@ -1,35 +0,0 @@
-# Dockerfile variant to ensure we can build CPU only on linux
-FROM --platform=linux/amd64 ubuntu:20.04 AS base-cpu-amd64
-ENV CMAKE_ARCH "x86_64"
-
-FROM --platform=linux/arm64 ubuntu:20.04 AS base-cpu-arm64
-ENV CMAKE_ARCH "aarch64"
-
-FROM base-cpu-${TARGETARCH} AS cpu-builder
-ARG TARGETARCH
-ARG GOFLAGS
-ARG CGO_CFLAGS
-
-# Common toolchain
-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y wget make gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \
-    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10
-RUN wget "https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6-linux-${CMAKE_ARCH}.sh" -O /tmp/cmake-installer.sh && \
-    chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr
-
-# install go
-ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
-RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.3.tar.gz
-
-# build the final binary
-FROM cpu-builder AS cpu-build
-WORKDIR /go/src/github.com/jmorganca/ollama
-COPY . .
-
-ENV GOOS=linux
-ENV GOARCH=$TARGETARCH
-ENV GOFLAGS=$GOFLAGS
-ENV CGO_CFLAGS=${CGO_CFLAGS}
-
-RUN /usr/local/go/bin/go generate ./... && \
-    /usr/local/go/bin/go build .
\ No newline at end of file
diff --git a/llm/llama.cpp/gen_common.sh b/llm/llama.cpp/gen_common.sh
index c6b84f7d..ad5d12e2 100644
--- a/llm/llama.cpp/gen_common.sh
+++ b/llm/llama.cpp/gen_common.sh
@@ -3,7 +3,7 @@
 init_vars() {
     LLAMACPP_DIR=gguf
     PATCHES="0001-Expose-callable-API-for-server.patch"
-    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
+    CMAKE_DEFS="-DLLAMA_ACCELERATE=on -DLLAMA_SERVER_VERBOSE=off"
     # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
     CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
     if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
diff --git a/llm/llama.cpp/gen_linux.sh b/llm/llama.cpp/gen_linux.sh
index e3cb87a8..3d659fff 100755
--- a/llm/llama.cpp/gen_linux.sh
+++ b/llm/llama.cpp/gen_linux.sh
@@ -22,13 +22,14 @@ if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
 fi
 COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_ACCELERATE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
 OLLAMA_DYN_LIB_DIR="gguf/build/lib"
-mkdir -p ${OLLAMA_DYN_LIB_DIR}
-touch ${OLLAMA_DYN_LIB_DIR}/.generated
 source $(dirname $0)/gen_common.sh
 init_vars
 git_module_setup
 apply_patches
 
+mkdir -p ${OLLAMA_DYN_LIB_DIR}
+touch ${OLLAMA_DYN_LIB_DIR}/.generated
+
 #
 # CPU first for the default library
 #
diff --git a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
index 07e42972..ac3fc12a 100644
--- a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
+++ b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
@@ -1,4 +1,4 @@
-From 7184ae16e8fd0e9e91cac4c81daa323057fa992b Mon Sep 17 00:00:00 2001
+From 4c72576c5f6c2217b1ecf7fd8523616acc5526ae Mon Sep 17 00:00:00 2001
 From: Daniel Hiltgen <daniel@ollama.com>
 Date: Mon, 13 Nov 2023 12:25:58 -0800
 Subject: [PATCH] Expose callable API for server
@@ -6,10 +6,10 @@ Subject: [PATCH] Expose callable API for server
 This adds an extern "C" interface within the example server
 ---
  examples/server/CMakeLists.txt |  24 +++
- examples/server/server.cpp     | 276 +++++++++++++++++++++++++++++++++
+ examples/server/server.cpp     | 279 +++++++++++++++++++++++++++++++++
  examples/server/server.h       |  89 +++++++++++
  ggml-cuda.cu                   |   1 +
- 4 files changed, 390 insertions(+)
+ 4 files changed, 393 insertions(+)
  create mode 100644 examples/server/server.h
 
 diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
@@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644
 +endif()
 \ No newline at end of file
 diff --git a/examples/server/server.cpp b/examples/server/server.cpp
-index 0403853..065420c 100644
+index 0403853..5e78e4d 100644
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
 @@ -5,6 +5,9 @@
@@ -67,7 +67,7 @@ index 0403853..065420c 100644
  int main(int argc, char **argv)
  {
  #if SERVER_VERBOSE != 1
-@@ -3123,3 +3127,275 @@ int main(int argc, char **argv)
+@@ -3123,3 +3127,278 @@ int main(int argc, char **argv)
      llama_backend_free();
      return 0;
  }
@@ -80,6 +80,9 @@ index 0403853..065420c 100644
 +
 +void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err)
 +{
++#if SERVER_VERBOSE != 1
++    log_disable();
++#endif
 +    assert(err != NULL && sparams != NULL);
 +    err->id = 0;
 +    err->msg[0] = '\0';
diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh
index 836de6ac..06a2ae1c 100755
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -8,14 +8,8 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
 mkdir -p dist
 
 for TARGETARCH in amd64 arm64; do
-    docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t gpubuilder:$TARGETARCH .
-    docker create --platform linux/$TARGETARCH --name gpubuilder-$TARGETARCH gpubuilder:$TARGETARCH
-    docker cp gpubuilder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
-    docker rm gpubuilder-$TARGETARCH
-
-    docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.cpu -t cpubuilder:$TARGETARCH .
-    docker create --platform linux/$TARGETARCH --name cpubuilder-$TARGETARCH cpubuilder:$TARGETARCH
-    docker cp cpubuilder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH-cpu
-    docker rm cpubuilder-$TARGETARCH
-
+    docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
+    docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
+    docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
+    docker rm builder-$TARGETARCH
 done