diff --git a/docs/development.md b/docs/development.md
index cf75101d..594787d7 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -14,7 +14,13 @@ Install required tools:
 brew install go cmake gcc
 ```
 
-Get the required libraries:
+Optionally enable debugging and more verbose logging:
+
+```bash
+export CGO_CFLAGS="-g"
+```
+
+Get the required libraries and build the native LLM code:
 
 ```bash
 go generate ./...
diff --git a/llm/llama.cpp/gen_common.sh b/llm/llama.cpp/gen_common.sh
index ad5d12e2..0bfd8d8f 100644
--- a/llm/llama.cpp/gen_common.sh
+++ b/llm/llama.cpp/gen_common.sh
@@ -3,14 +3,14 @@
 init_vars() {
     LLAMACPP_DIR=gguf
     PATCHES="0001-Expose-callable-API-for-server.patch"
-    CMAKE_DEFS="-DLLAMA_ACCELERATE=on -DLLAMA_SERVER_VERBOSE=off"
+    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
     # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
     CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
     if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
-        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on ${CMAKE_DEFS}"
+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
     else
         # TODO - add additional optimization flags...
-        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release ${CMAKE_DEFS}"
+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
     fi
 }
 
diff --git a/llm/llama.cpp/gen_windows.ps1 b/llm/llama.cpp/gen_windows.ps1
index 2c77d4ab..2f2f856d 100644
--- a/llm/llama.cpp/gen_windows.ps1
+++ b/llm/llama.cpp/gen_windows.ps1
@@ -7,9 +7,10 @@ function init_vars {
     $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-DLLAMA_K_QUANTS=on", "-DLLAMA_ACCELERATE=on", "-A","x64")
 
     if ($env:CGO_CFLAGS -contains "-g") {
-        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on")
+        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
         $script:config = "RelWithDebInfo"
     } else {
+        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
         $script:config = "Release"
     }
 }
diff --git a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
index ac3fc12a..e1c1b141 100644
--- a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
+++ b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
@@ -1,22 +1,22 @@
-From 4c72576c5f6c2217b1ecf7fd8523616acc5526ae Mon Sep 17 00:00:00 2001
+From 90c332fe2ef61149b38561d02836e66715df214d Mon Sep 17 00:00:00 2001
 From: Daniel Hiltgen <daniel@ollama.com>
 Date: Mon, 13 Nov 2023 12:25:58 -0800
 Subject: [PATCH] Expose callable API for server
 
 This adds an extern "C" interface within the example server
 ---
- examples/server/CMakeLists.txt |  24 +++
- examples/server/server.cpp     | 279 +++++++++++++++++++++++++++++++++
+ examples/server/CMakeLists.txt |  27 ++++
+ examples/server/server.cpp     | 280 +++++++++++++++++++++++++++++++++
  examples/server/server.h       |  89 +++++++++++
  ggml-cuda.cu                   |   1 +
- 4 files changed, 393 insertions(+)
+ 4 files changed, 397 insertions(+)
  create mode 100644 examples/server/server.h
 
 diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
-index 859cd12..4ea47a7 100644
+index 859cd12..da2b9bf 100644
 --- a/examples/server/CMakeLists.txt
 +++ b/examples/server/CMakeLists.txt
-@@ -11,3 +11,27 @@ if (WIN32)
+@@ -11,3 +11,30 @@ if (WIN32)
      TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
  endif()
  target_compile_features(${TARGET} PRIVATE cxx_std_11)
@@ -29,6 +29,9 @@ index 859cd12..4ea47a7 100644
 +target_compile_features(${TARGET} PRIVATE cxx_std_11)
 +target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
 +target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT})
++target_compile_definitions(${TARGET} PRIVATE
++    SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
++)
 +
 +if (BUILD_SHARED_LIBS)
 +    set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -46,7 +49,7 @@ index 859cd12..4ea47a7 100644
 +endif()
 \ No newline at end of file
 diff --git a/examples/server/server.cpp b/examples/server/server.cpp
-index 0403853..5e78e4d 100644
+index 0403853..07fb05c 100644
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
 @@ -5,6 +5,9 @@
@@ -67,7 +70,7 @@ index 0403853..5e78e4d 100644
  int main(int argc, char **argv)
  {
  #if SERVER_VERBOSE != 1
-@@ -3123,3 +3127,278 @@ int main(int argc, char **argv)
+@@ -3123,3 +3127,279 @@ int main(int argc, char **argv)
      llama_backend_free();
      return 0;
  }
@@ -81,6 +84,7 @@ index 0403853..5e78e4d 100644
 +void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err)
 +{
 +#if SERVER_VERBOSE != 1
++    LOG_TEE("disabling verbose llm logging\n");
 +    log_disable();
 +#endif
 +    assert(err != NULL && sparams != NULL);
diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh
index 06a2ae1c..8a5d86a5 100755
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -8,7 +8,7 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
 mkdir -p dist
 
 for TARGETARCH in amd64 arm64; do
-    docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
+    docker buildx build --load --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
     docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
     docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
     docker rm builder-$TARGETARCH