Fix CPU performance on hyperthreaded systems
The default thread count logic was broken and resulted in 2x the number of threads as it should on a hyperthreading CPU resulting in thrashing and poor performance.
This commit is contained in:
parent
d9cd3d9667
commit
325d74985b
@ -37,7 +37,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -185,11 +184,7 @@ func newExtServer(server extServer, model string, adapters, projectors []string,
|
|||||||
sparams.mmproj = nil
|
sparams.mmproj = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.NumThread > 0 {
|
sparams.n_threads = C.uint(opts.NumThread)
|
||||||
sparams.n_threads = C.uint(opts.NumThread)
|
|
||||||
} else {
|
|
||||||
sparams.n_threads = C.uint(runtime.NumCPU())
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("Initializing internal llama server")
|
log.Printf("Initializing internal llama server")
|
||||||
resp := newExtServerResp(128)
|
resp := newExtServerResp(128)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
From b5e195803e2a989e57eef0010adce778df1e2d01 Mon Sep 17 00:00:00 2001
|
From 7184ae16e8fd0e9e91cac4c81daa323057fa992b Mon Sep 17 00:00:00 2001
|
||||||
From: Daniel Hiltgen <daniel@ollama.com>
|
From: Daniel Hiltgen <daniel@ollama.com>
|
||||||
Date: Mon, 13 Nov 2023 12:25:58 -0800
|
Date: Mon, 13 Nov 2023 12:25:58 -0800
|
||||||
Subject: [PATCH] Expose callable API for server
|
Subject: [PATCH] Expose callable API for server
|
||||||
@ -6,10 +6,10 @@ Subject: [PATCH] Expose callable API for server
|
|||||||
This adds an extern "C" interface within the example server
|
This adds an extern "C" interface within the example server
|
||||||
---
|
---
|
||||||
examples/server/CMakeLists.txt | 24 +++
|
examples/server/CMakeLists.txt | 24 +++
|
||||||
examples/server/server.cpp | 274 +++++++++++++++++++++++++++++++++
|
examples/server/server.cpp | 276 +++++++++++++++++++++++++++++++++
|
||||||
examples/server/server.h | 89 +++++++++++
|
examples/server/server.h | 89 +++++++++++
|
||||||
ggml-cuda.cu | 1 +
|
ggml-cuda.cu | 1 +
|
||||||
4 files changed, 388 insertions(+)
|
4 files changed, 390 insertions(+)
|
||||||
create mode 100644 examples/server/server.h
|
create mode 100644 examples/server/server.h
|
||||||
|
|
||||||
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
|
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
|
||||||
@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644
|
|||||||
+endif()
|
+endif()
|
||||||
\ No newline at end of file
|
\ No newline at end of file
|
||||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||||
index 0403853..2084fd8 100644
|
index 0403853..065420c 100644
|
||||||
--- a/examples/server/server.cpp
|
--- a/examples/server/server.cpp
|
||||||
+++ b/examples/server/server.cpp
|
+++ b/examples/server/server.cpp
|
||||||
@@ -5,6 +5,9 @@
|
@@ -5,6 +5,9 @@
|
||||||
@ -67,7 +67,7 @@ index 0403853..2084fd8 100644
|
|||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
#if SERVER_VERBOSE != 1
|
#if SERVER_VERBOSE != 1
|
||||||
@@ -3123,3 +3127,273 @@ int main(int argc, char **argv)
|
@@ -3123,3 +3127,275 @@ int main(int argc, char **argv)
|
||||||
llama_backend_free();
|
llama_backend_free();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -89,7 +89,9 @@ index 0403853..2084fd8 100644
|
|||||||
+ gpt_params params;
|
+ gpt_params params;
|
||||||
+ params.n_ctx = sparams->n_ctx;
|
+ params.n_ctx = sparams->n_ctx;
|
||||||
+ params.n_batch = sparams->n_batch;
|
+ params.n_batch = sparams->n_batch;
|
||||||
+ params.n_threads = sparams->n_threads;
|
+ if (sparams->n_threads > 0) {
|
||||||
|
+ params.n_threads = sparams->n_threads;
|
||||||
|
+ }
|
||||||
+ params.n_parallel = sparams->n_parallel;
|
+ params.n_parallel = sparams->n_parallel;
|
||||||
+ params.rope_freq_base = sparams->rope_freq_base;
|
+ params.rope_freq_base = sparams->rope_freq_base;
|
||||||
+ params.rope_freq_scale = sparams->rope_freq_scale;
|
+ params.rope_freq_scale = sparams->rope_freq_scale;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user