Compare commits

...

1 Commits

Author SHA1 Message Date
Michael Yang
7359c5ea5e usage templating
simplify usage templating by leveraging cobra's annotations
2024-09-12 21:25:26 -07:00
7 changed files with 263 additions and 206 deletions

View File

@ -8,6 +8,7 @@ import (
"crypto/ed25519" "crypto/ed25519"
"crypto/rand" "crypto/rand"
"crypto/sha256" "crypto/sha256"
_ "embed"
"encoding/pem" "encoding/pem"
"errors" "errors"
"fmt" "fmt"
@ -47,6 +48,9 @@ import (
"github.com/ollama/ollama/version" "github.com/ollama/ollama/version"
) )
//go:embed usage.gotmpl
var usageTemplate string
func CreateHandler(cmd *cobra.Command, args []string) error { func CreateHandler(cmd *cobra.Command, args []string) error {
filename, _ := cmd.Flags().GetString("file") filename, _ := cmd.Flags().GetString("file")
filename, err := filepath.Abs(filename) filename, err := filepath.Abs(filename)
@ -1254,21 +1258,6 @@ func versionHandler(cmd *cobra.Command, _ []string) {
} }
} }
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
if len(envs) == 0 {
return
}
envUsage := `
Environment Variables:
`
for _, e := range envs {
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
}
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
}
func NewCLI() *cobra.Command { func NewCLI() *cobra.Command {
log.SetFlags(log.LstdFlags | log.Lshortfile) log.SetFlags(log.LstdFlags | log.Lshortfile)
cobra.EnableCommandSorting = false cobra.EnableCommandSorting = false
@ -1298,22 +1287,24 @@ func NewCLI() *cobra.Command {
rootCmd.Flags().BoolP("version", "v", false, "Show version information") rootCmd.Flags().BoolP("version", "v", false, "Show version information")
createCmd := &cobra.Command{ createCmd := &cobra.Command{
Use: "create MODEL", Use: "create MODEL",
Short: "Create a model from a Modelfile", Short: "Create a model from a Modelfile",
Args: cobra.ExactArgs(1), Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: CreateHandler, RunE: CreateHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile") createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)") createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
showCmd := &cobra.Command{ showCmd := &cobra.Command{
Use: "show MODEL", Use: "show MODEL",
Short: "Show information for a model", Short: "Show information for a model",
Args: cobra.ExactArgs(1), Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: ShowHandler, RunE: ShowHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
showCmd.Flags().Bool("license", false, "Show license of a model") showCmd.Flags().Bool("license", false, "Show license of a model")
@ -1323,11 +1314,12 @@ func NewCLI() *cobra.Command {
showCmd.Flags().Bool("system", false, "Show system message of a model") showCmd.Flags().Bool("system", false, "Show system message of a model")
runCmd := &cobra.Command{ runCmd := &cobra.Command{
Use: "run MODEL [PROMPT]", Use: "run MODEL [PROMPT]",
Short: "Run a model", Short: "Run a model",
Args: cobra.MinimumNArgs(1), Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: RunHandler, RunE: RunHandler,
Annotations: envconfig.Describe("OLLAMA_HOST", "OLLAMA_NOHISTORY"),
} }
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)") runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
@ -1350,100 +1342,80 @@ func NewCLI() *cobra.Command {
Short: "Start ollama", Short: "Start ollama",
Args: cobra.ExactArgs(0), Args: cobra.ExactArgs(0),
RunE: RunServer, RunE: RunServer,
Annotations: envconfig.Describe(
"OLLAMA_DEBUG",
"OLLAMA_HOST",
"OLLAMA_KEEP_ALIVE",
"OLLAMA_MAX_LOADED_MODELS",
"OLLAMA_MAX_QUEUE",
"OLLAMA_MODELS",
"OLLAMA_NUM_PARALLEL",
"OLLAMA_NOPRUNE",
"OLLAMA_ORIGINS",
"OLLAMA_SCHED_SPREAD",
"OLLAMA_TMPDIR",
"OLLAMA_FLASH_ATTENTION",
"OLLAMA_LLM_LIBRARY",
"OLLAMA_GPU_OVERHEAD",
"OLLAMA_LOAD_TIMEOUT",
),
} }
pullCmd := &cobra.Command{ pullCmd := &cobra.Command{
Use: "pull MODEL", Use: "pull MODEL",
Short: "Pull a model from a registry", Short: "Pull a model from a registry",
Args: cobra.ExactArgs(1), Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: PullHandler, RunE: PullHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
pullCmd.Flags().Bool("insecure", false, "Use an insecure registry") pullCmd.Flags().Bool("insecure", false, "Use an insecure registry")
pushCmd := &cobra.Command{ pushCmd := &cobra.Command{
Use: "push MODEL", Use: "push MODEL",
Short: "Push a model to a registry", Short: "Push a model to a registry",
Args: cobra.ExactArgs(1), Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: PushHandler, RunE: PushHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
pushCmd.Flags().Bool("insecure", false, "Use an insecure registry") pushCmd.Flags().Bool("insecure", false, "Use an insecure registry")
listCmd := &cobra.Command{ listCmd := &cobra.Command{
Use: "list", Use: "list",
Aliases: []string{"ls"}, Aliases: []string{"ls"},
Short: "List models", Short: "List models",
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: ListHandler, RunE: ListHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
psCmd := &cobra.Command{ psCmd := &cobra.Command{
Use: "ps", Use: "ps",
Short: "List running models", Short: "List running models",
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: ListRunningHandler, RunE: ListRunningHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
copyCmd := &cobra.Command{ copyCmd := &cobra.Command{
Use: "cp SOURCE DESTINATION", Use: "cp SOURCE DESTINATION",
Short: "Copy a model", Short: "Copy a model",
Args: cobra.ExactArgs(2), Args: cobra.ExactArgs(2),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: CopyHandler, RunE: CopyHandler,
Annotations: envconfig.Describe("OLLAMA_HOST"),
} }
deleteCmd := &cobra.Command{ deleteCmd := &cobra.Command{
Use: "rm MODEL [MODEL...]", Use: "rm MODEL [MODEL...]",
Short: "Remove a model", Short: "Remove a model",
Args: cobra.MinimumNArgs(1), Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat, PreRunE: checkServerHeartbeat,
RunE: DeleteHandler, RunE: DeleteHandler,
} Annotations: envconfig.Describe("OLLAMA_HOST"),
envVars := envconfig.AsMap()
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
for _, cmd := range []*cobra.Command{
createCmd,
showCmd,
runCmd,
stopCmd,
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
serveCmd,
} {
switch cmd {
case runCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
case serveCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{
envVars["OLLAMA_DEBUG"],
envVars["OLLAMA_HOST"],
envVars["OLLAMA_KEEP_ALIVE"],
envVars["OLLAMA_MAX_LOADED_MODELS"],
envVars["OLLAMA_MAX_QUEUE"],
envVars["OLLAMA_MODELS"],
envVars["OLLAMA_NUM_PARALLEL"],
envVars["OLLAMA_NOPRUNE"],
envVars["OLLAMA_ORIGINS"],
envVars["OLLAMA_SCHED_SPREAD"],
envVars["OLLAMA_TMPDIR"],
envVars["OLLAMA_FLASH_ATTENTION"],
envVars["OLLAMA_LLM_LIBRARY"],
envVars["OLLAMA_GPU_OVERHEAD"],
envVars["OLLAMA_LOAD_TIMEOUT"],
})
default:
appendEnvDocs(cmd, envs)
}
} }
rootCmd.AddCommand( rootCmd.AddCommand(
@ -1460,5 +1432,7 @@ func NewCLI() *cobra.Command {
deleteCmd, deleteCmd,
) )
rootCmd.SetUsageTemplate(usageTemplate)
return rootCmd return rootCmd
} }

87
cmd/usage.gotmpl Normal file
View File

@ -0,0 +1,87 @@
Usage:
{{- if .Runnable }} {{ .UseLine }}
{{- end }}
{{- if .HasAvailableSubCommands }} {{ .CommandPath }} [command]
{{- end }}
{{- if gt (len .Aliases) 0}}
Aliases:
{{ .NameAndAliases }}
{{- end }}
{{- if .HasExample }}
Examples:
{{ .Example }}
{{- end }}
{{- if .HasAvailableSubCommands }}
{{- if eq (len .Groups) 0}}
Available Commands:
{{- range .Commands }}
{{- if or .IsAvailableCommand (eq .Name "help") }}
{{ rpad .Name .NamePadding }} {{ .Short }}
{{- end }}
{{- end }}
{{- else }}
{{- range .Groups }}
{{ .Title }}
{{- range $.Commands }}
{{- if and (eq .GroupID .ID) (or .IsAvailableCommand (eq .Name "help")) }}
{{ rpad .Name .NamePadding }} {{ .Short }}
{{- end }}
{{- end }}
{{- end }}
{{- if not .AllChildCommandsHaveGroup }}
Additional Commands:
{{- range $.Commands }}
{{- if and (eq .GroupID "") (or .IsAvailableCommand (eq .Name "help")) }}
{{ rpad .Name .NamePadding }} {{ .Short }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- if .HasAvailableLocalFlags }}
Flags:
{{ .LocalFlags.FlagUsages | trimTrailingWhitespaces }}
{{- end }}
{{- if .HasAvailableInheritedFlags }}
Global Flags:
{{ .InheritedFlags.FlagUsages | trimTrailingWhitespaces }}
{{- end }}
{{- if .Annotations }}
Environment Variables:
{{- range $key, $value := .Annotations }}
{{ rpad $key 24 }} {{ $value | trimTrailingWhitespaces }}
{{- end }}
{{- end }}
{{- if .HasHelpSubCommands }}
Additional help topics:
{{- range .Commands }}
{{- if .IsAdditionalHelpTopicCommand }}
{{ rpad .CommandPath .CommandPathPadding }} {{ .Short }}
{{- end }}
{{- end }}
{{- end }}
{{- if .HasAvailableSubCommands }}
Use "{{ .CommandPath }} [command] --help" for more information about a command.
{{- end }}

View File

@ -9,6 +9,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
"slices"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -92,45 +93,36 @@ func Models() string {
return filepath.Join(home, ".ollama", "models") return filepath.Join(home, ".ollama", "models")
} }
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable. func Duration(k string, defaultValue time.Duration, zeroIsInfinite bool) func() time.Duration {
// Negative values are treated as infinite. Zero is treated as no keep alive. return func() time.Duration {
// Default is 5 minutes. dur := defaultValue
func KeepAlive() (keepAlive time.Duration) { if s := Var(k); s != "" {
keepAlive = 5 * time.Minute if d, err := time.ParseDuration(s); err == nil {
if s := Var("OLLAMA_KEEP_ALIVE"); s != "" { dur = d
if d, err := time.ParseDuration(s); err == nil { } else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
keepAlive = d dur = time.Duration(n) * time.Second
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil { }
keepAlive = time.Duration(n) * time.Second
} }
}
if keepAlive < 0 { if dur < 0 || (dur == 0 && zeroIsInfinite) {
return time.Duration(math.MaxInt64) return time.Duration(math.MaxInt64)
} }
return keepAlive return dur
}
} }
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable. var (
// Zero or Negative values are treated as infinite. // KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Default is 5 minutes. // Negative values are treated as infinite keep alive. Zero is treated as no keep alive.
func LoadTimeout() (loadTimeout time.Duration) { // Default is 5 minutes.
loadTimeout = 5 * time.Minute KeepAlive = Duration("OLLAMA_KEEP_ALIVE", 5*time.Minute, false)
if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
if d, err := time.ParseDuration(s); err == nil {
loadTimeout = d
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
loadTimeout = time.Duration(n) * time.Second
}
}
if loadTimeout <= 0 { // LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
return time.Duration(math.MaxInt64) // Negative or zero values are treated as infinite timeout.
} // Default is 5 minutes.
LoadTimeout = Duration("OLLAMA_LOAD_TIMEOUT", 5*time.Minute, true)
return loadTimeout )
}
func Bool(k string) func() bool { func Bool(k string) func() bool {
return func() bool { return func() bool {
@ -170,7 +162,7 @@ func String(s string) func() string {
var ( var (
LLMLibrary = String("OLLAMA_LLM_LIBRARY") LLMLibrary = String("OLLAMA_LLM_LIBRARY")
TmpDir = String("OLLAMA_TMPDIR") TempDir = String("OLLAMA_TMPDIR")
CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES") CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
HipVisibleDevices = String("HIP_VISIBLE_DEVICES") HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
@ -179,13 +171,14 @@ var (
HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION") HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
) )
func Uint(key string, defaultValue uint) func() uint {
return func() uint { func Uint[T uint | uint16 | uint32 | uint64](key string, defaultValue T) func() T {
return func() T {
if s := Var(key); s != "" { if s := Var(key); s != "" {
if n, err := strconv.ParseUint(s, 10, 64); err != nil { if n, err := strconv.ParseUint(s, 10, 64); err != nil {
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
} else { } else {
return uint(n) return T(n)
} }
} }
@ -195,88 +188,91 @@ func Uint(key string, defaultValue uint) func() uint {
var ( var (
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable. // NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0) NumParallel = Uint("OLLAMA_NUM_PARALLEL", uint(0))
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable. // MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0) MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", uint(0))
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable. // MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512) MaxQueue = Uint("OLLAMA_MAX_QUEUE", uint(512))
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable. // MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0) MaxVRAM = Uint("OLLAMA_MAX_VRAM", uint(0))
// GPUOverhead reserves a portion of VRAM per GPU. GPUOverhead can be configured via the OLLAMA_GPU_OVERHEAD environment variable.
GPUOverhead = Uint("OLLAMA_GPU_OVERHEAD", uint64(0))
) )
func Uint64(key string, defaultValue uint64) func() uint64 { type desc struct {
return func() uint64 { name string
if s := Var(key); s != "" { usage string
if n, err := strconv.ParseUint(s, 10, 64); err != nil { value any
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) defaultValue any
} else {
return n
}
}
return defaultValue
}
} }
// Set aside VRAM per GPU func (e desc) String() string {
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0) return fmt.Sprintf("%s:%v", e.name, e.value)
type EnvVar struct {
Name string
Value any
Description string
} }
func AsMap() map[string]EnvVar { func Vars() []desc {
ret := map[string]EnvVar{ s := []desc{
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, {"OLLAMA_DEBUG", "Enable debug", Debug(), false},
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"}, {"OLLAMA_FLASH_ATTENTION", "Enabled flash attention", FlashAttention(), false},
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"}, {"OLLAMA_GPU_OVERHEAD", "Reserve a portion of VRAM per GPU", GPUOverhead(), 0},
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"}, {"OLLAMA_HOST", "Listen address and port", Host(), "127.0.0.1:11434"},
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"}, {"OLLAMA_KEEP_ALIVE", "Duration of inactivity before models are unloaded", KeepAlive(), 5 * time.Minute},
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"}, {"OLLAMA_LLM_LIBRARY", "Set LLM library to bypass autodetection", LLMLibrary(), nil},
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"}, {"OLLAMA_LOAD_TIMEOUT", "Duration for stall detection during model loads", LoadTimeout(), 5 * time.Minute},
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"}, {"OLLAMA_MAX_LOADED_MODELS", "Maximum number of loaded models per GPU", MaxRunners(), nil},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"}, {"OLLAMA_MAX_QUEUE", "Maximum number of queued requests", MaxQueue(), nil},
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"}, {"OLLAMA_MAX_VRAM", "Maximum VRAM to consider for model offloading", MaxVRAM(), nil},
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"}, {"OLLAMA_MODELS", "Path override for models directory", Models(), nil},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"}, {"OLLAMA_NOHISTORY", "Disable readline history", NoHistory(), false},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"}, {"OLLAMA_NOPRUNE", "Disable unused blob pruning", NoPrune(), false},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"}, {"OLLAMA_NUM_PARALLEL", "Maximum number of parallel requests before requests are queued", NumParallel(), nil},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, {"OLLAMA_ORIGINS", "Additional HTTP Origins to allow", Origins(), nil},
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"}, {"OLLAMA_SCHED_SPREAD", "Always schedule model across all GPUs", SchedSpread(), false},
{"OLLAMA_TMPDIR", "Path override for temporary directory", TempDir(), nil},
// Informational // informational
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"}, {"HTTPS_PROXY", "Proxy for HTTPS requests", os.Getenv("HTTPS_PROXY"), nil},
"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"}, {"HTTP_PROXY", "Proxy for HTTP requests", os.Getenv("HTTP_PROXY"), nil},
"NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"}, {"NO_PROXY", "No proxy for these hosts", os.Getenv("NO_PROXY"), nil},
} }
if runtime.GOOS != "windows" { if runtime.GOOS != "windows" {
// Windows environment variables are case-insensitive so there's no need to duplicate them s = append(
ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"} s,
ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"} desc{"https_proxy", "Proxy for HTTPS requests", os.Getenv("https_proxy"), nil},
ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"} desc{"http_proxy", "Proxy for HTTP requests", os.Getenv("http_proxy"), nil},
desc{"no_proxy", "No proxy for these hosts", os.Getenv("no_proxy"), nil},
)
} }
if runtime.GOOS != "darwin" { if runtime.GOOS != "darwin" {
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"} s = append(
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"} s,
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"} desc{"CUDA_VISIBLE_DEVICES", "Set which NVIDIA devices are visible", CudaVisibleDevices(), nil},
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"} desc{"HIP_VISIBLE_DEVICES", "Set which AMD devices are visible", HipVisibleDevices(), nil},
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"} desc{"ROCR_VISIBLE_DEVICES", "Set which AMD devices are visible", RocrVisibleDevices(), nil},
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"} desc{"GPU_DEVICE_ORDINAL", "Set which AMD devices are visible", GpuDeviceOrdinal(), nil},
desc{"HSA_OVERRIDE_GFX_VERSION", "Override the gfx used for all detected AMD GPUs", HsaOverrideGfxVersion(), nil},
desc{"OLLAMA_INTEL_GPU", "Enable experimental Intel GPU detection", IntelGPU(), nil},
)
} }
return ret return s
} }
func Values() map[string]string { func Describe(s ...string) map[string]string {
vals := make(map[string]string) vars := Vars()
for k, v := range AsMap() { m := make(map[string]string, len(s))
vals[k] = fmt.Sprintf("%v", v.Value) for _, k := range s {
if i := slices.IndexFunc(vars, func(e desc) bool { return e.name == k }); i != -1 {
m[k] = vars[i].usage
if vars[i].defaultValue != nil {
m[k] = fmt.Sprintf("%s (default: %v)", vars[i].usage, vars[i].defaultValue)
}
}
} }
return vals
return m
} }
// Var returns an environment variable stripped of leading and trailing quotes or spaces // Var returns an environment variable stripped of leading and trailing quotes or spaces

View File

@ -175,7 +175,7 @@ func TestUint(t *testing.T) {
for k, v := range cases { for k, v := range cases {
t.Run(k, func(t *testing.T) { t.Run(k, func(t *testing.T) {
t.Setenv("OLLAMA_UINT", k) t.Setenv("OLLAMA_UINT", k)
if i := Uint("OLLAMA_UINT", 11434)(); i != v { if i := Uint("OLLAMA_UINT", uint(11434))(); i != v {
t.Errorf("%s: expected %d, got %d", k, v, i) t.Errorf("%s: expected %d, got %d", k, v, i)
} }
}) })

View File

@ -95,7 +95,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
// Overflow that didn't fit into the GPU // Overflow that didn't fit into the GPU
var overflow uint64 var overflow uint64
overhead := envconfig.GpuOverhead() overhead := envconfig.GPUOverhead()
availableList := make([]string, len(gpus)) availableList := make([]string, len(gpus))
for i, gpu := range gpus { for i, gpu := range gpus {
availableList[i] = format.HumanBytes2(gpu.FreeMemory) availableList[i] = format.HumanBytes2(gpu.FreeMemory)
@ -322,7 +322,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
} }
func (m MemoryEstimate) log() { func (m MemoryEstimate) log() {
overhead := envconfig.GpuOverhead() overhead := envconfig.GPUOverhead()
slog.Info( slog.Info(
"offload to "+m.inferenceLibrary, "offload to "+m.inferenceLibrary,
slog.Group( slog.Group(

View File

@ -119,7 +119,7 @@ func hasPayloads(payloadFS fs.FS) bool {
func extractRunners(payloadFS fs.FS) (string, error) { func extractRunners(payloadFS fs.FS) (string, error) {
cleanupTmpDirs() cleanupTmpDirs()
tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama") tmpDir, err := os.MkdirTemp(envconfig.TempDir(), "ollama")
if err != nil { if err != nil {
return "", fmt.Errorf("failed to generate tmp dir: %w", err) return "", fmt.Errorf("failed to generate tmp dir: %w", err)
} }
@ -224,7 +224,7 @@ func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
// Best effort to clean up prior tmpdirs // Best effort to clean up prior tmpdirs
func cleanupTmpDirs() { func cleanupTmpDirs() {
tmpDir := envconfig.TmpDir() tmpDir := envconfig.TempDir()
if tmpDir == "" { if tmpDir == "" {
tmpDir = os.TempDir() tmpDir = os.TempDir()
} }

View File

@ -1150,7 +1150,7 @@ func Serve(ln net.Listener) error {
level = slog.LevelDebug level = slog.LevelDebug
} }
slog.Info("server config", "env", envconfig.Values()) slog.Info("server config", "env", envconfig.Vars())
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
Level: level, Level: level,
AddSource: true, AddSource: true,