diff --git a/llm/llama.go b/llm/llama.go index 4eab751d..7172f91e 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -339,11 +339,14 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers "--model", model, "--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--batch-size", fmt.Sprintf("%d", opts.NumBatch), - "--main-gpu", fmt.Sprintf("%d", opts.MainGPU), "--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--embedding", } + if opts.MainGPU > 0 { + params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU)) + } + if opts.RopeFrequencyBase > 0 { params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase)) }