diff --git a/llm/llm.go b/llm/llm.go index b537865e..020e3c2f 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -2,6 +2,7 @@ package llm import ( "fmt" + "log" "os" "github.com/jmorganca/ollama/api" @@ -31,6 +32,16 @@ func New(model string, opts api.Options) (LLM, error) { return nil, err } + switch ggml.FileType { + case FileTypeF32, FileTypeF16, FileTypeQ5_0, FileTypeQ5_1, FileTypeQ8_0: + if opts.NumGPU != 0 { + // Q5_0, Q5_1, and Q8_0 do not support Metal API and will + // cause the runner to segmentation fault so disable GPU + log.Printf("WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0") + opts.NumGPU = 0 + } + } + switch ggml.ModelFamily { case ModelFamilyLlama: return newLlama(model, opts)