usage templating
simplify usage templating by leveraging cobra's annotations
This commit is contained in:
parent
fda0d3be52
commit
7359c5ea5e
178
cmd/cmd.go
178
cmd/cmd.go
@ -8,6 +8,7 @@ import (
|
||||
"crypto/ed25519"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
_ "embed"
|
||||
"encoding/pem"
|
||||
"errors"
|
||||
"fmt"
|
||||
@ -47,6 +48,9 @@ import (
|
||||
"github.com/ollama/ollama/version"
|
||||
)
|
||||
|
||||
//go:embed usage.gotmpl
|
||||
var usageTemplate string
|
||||
|
||||
func CreateHandler(cmd *cobra.Command, args []string) error {
|
||||
filename, _ := cmd.Flags().GetString("file")
|
||||
filename, err := filepath.Abs(filename)
|
||||
@ -1254,21 +1258,6 @@ func versionHandler(cmd *cobra.Command, _ []string) {
|
||||
}
|
||||
}
|
||||
|
||||
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
|
||||
if len(envs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
envUsage := `
|
||||
Environment Variables:
|
||||
`
|
||||
for _, e := range envs {
|
||||
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
|
||||
}
|
||||
|
||||
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
|
||||
}
|
||||
|
||||
func NewCLI() *cobra.Command {
|
||||
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||
cobra.EnableCommandSorting = false
|
||||
@ -1298,22 +1287,24 @@ func NewCLI() *cobra.Command {
|
||||
rootCmd.Flags().BoolP("version", "v", false, "Show version information")
|
||||
|
||||
createCmd := &cobra.Command{
|
||||
Use: "create MODEL",
|
||||
Short: "Create a model from a Modelfile",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CreateHandler,
|
||||
Use: "create MODEL",
|
||||
Short: "Create a model from a Modelfile",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CreateHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
|
||||
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
|
||||
|
||||
showCmd := &cobra.Command{
|
||||
Use: "show MODEL",
|
||||
Short: "Show information for a model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ShowHandler,
|
||||
Use: "show MODEL",
|
||||
Short: "Show information for a model",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ShowHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
showCmd.Flags().Bool("license", false, "Show license of a model")
|
||||
@ -1323,11 +1314,12 @@ func NewCLI() *cobra.Command {
|
||||
showCmd.Flags().Bool("system", false, "Show system message of a model")
|
||||
|
||||
runCmd := &cobra.Command{
|
||||
Use: "run MODEL [PROMPT]",
|
||||
Short: "Run a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: RunHandler,
|
||||
Use: "run MODEL [PROMPT]",
|
||||
Short: "Run a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: RunHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST", "OLLAMA_NOHISTORY"),
|
||||
}
|
||||
|
||||
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
|
||||
@ -1350,100 +1342,80 @@ func NewCLI() *cobra.Command {
|
||||
Short: "Start ollama",
|
||||
Args: cobra.ExactArgs(0),
|
||||
RunE: RunServer,
|
||||
Annotations: envconfig.Describe(
|
||||
"OLLAMA_DEBUG",
|
||||
"OLLAMA_HOST",
|
||||
"OLLAMA_KEEP_ALIVE",
|
||||
"OLLAMA_MAX_LOADED_MODELS",
|
||||
"OLLAMA_MAX_QUEUE",
|
||||
"OLLAMA_MODELS",
|
||||
"OLLAMA_NUM_PARALLEL",
|
||||
"OLLAMA_NOPRUNE",
|
||||
"OLLAMA_ORIGINS",
|
||||
"OLLAMA_SCHED_SPREAD",
|
||||
"OLLAMA_TMPDIR",
|
||||
"OLLAMA_FLASH_ATTENTION",
|
||||
"OLLAMA_LLM_LIBRARY",
|
||||
"OLLAMA_GPU_OVERHEAD",
|
||||
"OLLAMA_LOAD_TIMEOUT",
|
||||
),
|
||||
}
|
||||
|
||||
pullCmd := &cobra.Command{
|
||||
Use: "pull MODEL",
|
||||
Short: "Pull a model from a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PullHandler,
|
||||
Use: "pull MODEL",
|
||||
Short: "Pull a model from a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PullHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
pullCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
|
||||
pushCmd := &cobra.Command{
|
||||
Use: "push MODEL",
|
||||
Short: "Push a model to a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PushHandler,
|
||||
Use: "push MODEL",
|
||||
Short: "Push a model to a registry",
|
||||
Args: cobra.ExactArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: PushHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
pushCmd.Flags().Bool("insecure", false, "Use an insecure registry")
|
||||
|
||||
listCmd := &cobra.Command{
|
||||
Use: "list",
|
||||
Aliases: []string{"ls"},
|
||||
Short: "List models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListHandler,
|
||||
Use: "list",
|
||||
Aliases: []string{"ls"},
|
||||
Short: "List models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
psCmd := &cobra.Command{
|
||||
Use: "ps",
|
||||
Short: "List running models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListRunningHandler,
|
||||
Use: "ps",
|
||||
Short: "List running models",
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: ListRunningHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
copyCmd := &cobra.Command{
|
||||
Use: "cp SOURCE DESTINATION",
|
||||
Short: "Copy a model",
|
||||
Args: cobra.ExactArgs(2),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CopyHandler,
|
||||
Use: "cp SOURCE DESTINATION",
|
||||
Short: "Copy a model",
|
||||
Args: cobra.ExactArgs(2),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: CopyHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
deleteCmd := &cobra.Command{
|
||||
Use: "rm MODEL [MODEL...]",
|
||||
Short: "Remove a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: DeleteHandler,
|
||||
}
|
||||
|
||||
envVars := envconfig.AsMap()
|
||||
|
||||
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
|
||||
|
||||
for _, cmd := range []*cobra.Command{
|
||||
createCmd,
|
||||
showCmd,
|
||||
runCmd,
|
||||
stopCmd,
|
||||
pullCmd,
|
||||
pushCmd,
|
||||
listCmd,
|
||||
psCmd,
|
||||
copyCmd,
|
||||
deleteCmd,
|
||||
serveCmd,
|
||||
} {
|
||||
switch cmd {
|
||||
case runCmd:
|
||||
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
|
||||
case serveCmd:
|
||||
appendEnvDocs(cmd, []envconfig.EnvVar{
|
||||
envVars["OLLAMA_DEBUG"],
|
||||
envVars["OLLAMA_HOST"],
|
||||
envVars["OLLAMA_KEEP_ALIVE"],
|
||||
envVars["OLLAMA_MAX_LOADED_MODELS"],
|
||||
envVars["OLLAMA_MAX_QUEUE"],
|
||||
envVars["OLLAMA_MODELS"],
|
||||
envVars["OLLAMA_NUM_PARALLEL"],
|
||||
envVars["OLLAMA_NOPRUNE"],
|
||||
envVars["OLLAMA_ORIGINS"],
|
||||
envVars["OLLAMA_SCHED_SPREAD"],
|
||||
envVars["OLLAMA_TMPDIR"],
|
||||
envVars["OLLAMA_FLASH_ATTENTION"],
|
||||
envVars["OLLAMA_LLM_LIBRARY"],
|
||||
envVars["OLLAMA_GPU_OVERHEAD"],
|
||||
envVars["OLLAMA_LOAD_TIMEOUT"],
|
||||
})
|
||||
default:
|
||||
appendEnvDocs(cmd, envs)
|
||||
}
|
||||
Use: "rm MODEL [MODEL...]",
|
||||
Short: "Remove a model",
|
||||
Args: cobra.MinimumNArgs(1),
|
||||
PreRunE: checkServerHeartbeat,
|
||||
RunE: DeleteHandler,
|
||||
Annotations: envconfig.Describe("OLLAMA_HOST"),
|
||||
}
|
||||
|
||||
rootCmd.AddCommand(
|
||||
@ -1460,5 +1432,7 @@ func NewCLI() *cobra.Command {
|
||||
deleteCmd,
|
||||
)
|
||||
|
||||
rootCmd.SetUsageTemplate(usageTemplate)
|
||||
|
||||
return rootCmd
|
||||
}
|
||||
|
87
cmd/usage.gotmpl
Normal file
87
cmd/usage.gotmpl
Normal file
@ -0,0 +1,87 @@
|
||||
Usage:
|
||||
{{- if .Runnable }} {{ .UseLine }}
|
||||
{{- end }}
|
||||
{{- if .HasAvailableSubCommands }} {{ .CommandPath }} [command]
|
||||
{{- end }}
|
||||
|
||||
{{- if gt (len .Aliases) 0}}
|
||||
|
||||
Aliases:
|
||||
{{ .NameAndAliases }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasExample }}
|
||||
|
||||
Examples:
|
||||
{{ .Example }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableSubCommands }}
|
||||
{{- if eq (len .Groups) 0}}
|
||||
|
||||
Available Commands:
|
||||
{{- range .Commands }}
|
||||
{{- if or .IsAvailableCommand (eq .Name "help") }}
|
||||
{{ rpad .Name .NamePadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- else }}
|
||||
|
||||
{{- range .Groups }}
|
||||
|
||||
{{ .Title }}
|
||||
|
||||
{{- range $.Commands }}
|
||||
{{- if and (eq .GroupID .ID) (or .IsAvailableCommand (eq .Name "help")) }}
|
||||
{{ rpad .Name .NamePadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if not .AllChildCommandsHaveGroup }}
|
||||
|
||||
Additional Commands:
|
||||
{{- range $.Commands }}
|
||||
{{- if and (eq .GroupID "") (or .IsAvailableCommand (eq .Name "help")) }}
|
||||
{{ rpad .Name .NamePadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableLocalFlags }}
|
||||
|
||||
Flags:
|
||||
{{ .LocalFlags.FlagUsages | trimTrailingWhitespaces }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableInheritedFlags }}
|
||||
|
||||
Global Flags:
|
||||
{{ .InheritedFlags.FlagUsages | trimTrailingWhitespaces }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Annotations }}
|
||||
|
||||
Environment Variables:
|
||||
{{- range $key, $value := .Annotations }}
|
||||
{{ rpad $key 24 }} {{ $value | trimTrailingWhitespaces }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasHelpSubCommands }}
|
||||
|
||||
Additional help topics:
|
||||
{{- range .Commands }}
|
||||
{{- if .IsAdditionalHelpTopicCommand }}
|
||||
{{ rpad .CommandPath .CommandPathPadding }} {{ .Short }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .HasAvailableSubCommands }}
|
||||
|
||||
Use "{{ .CommandPath }} [command] --help" for more information about a command.
|
||||
{{- end }}
|
@ -9,6 +9,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@ -92,45 +93,36 @@ func Models() string {
|
||||
return filepath.Join(home, ".ollama", "models")
|
||||
}
|
||||
|
||||
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
|
||||
// Negative values are treated as infinite. Zero is treated as no keep alive.
|
||||
// Default is 5 minutes.
|
||||
func KeepAlive() (keepAlive time.Duration) {
|
||||
keepAlive = 5 * time.Minute
|
||||
if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
|
||||
if d, err := time.ParseDuration(s); err == nil {
|
||||
keepAlive = d
|
||||
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||||
keepAlive = time.Duration(n) * time.Second
|
||||
func Duration(k string, defaultValue time.Duration, zeroIsInfinite bool) func() time.Duration {
|
||||
return func() time.Duration {
|
||||
dur := defaultValue
|
||||
if s := Var(k); s != "" {
|
||||
if d, err := time.ParseDuration(s); err == nil {
|
||||
dur = d
|
||||
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||||
dur = time.Duration(n) * time.Second
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if keepAlive < 0 {
|
||||
return time.Duration(math.MaxInt64)
|
||||
}
|
||||
if dur < 0 || (dur == 0 && zeroIsInfinite) {
|
||||
return time.Duration(math.MaxInt64)
|
||||
}
|
||||
|
||||
return keepAlive
|
||||
return dur
|
||||
}
|
||||
}
|
||||
|
||||
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
|
||||
// Zero or Negative values are treated as infinite.
|
||||
// Default is 5 minutes.
|
||||
func LoadTimeout() (loadTimeout time.Duration) {
|
||||
loadTimeout = 5 * time.Minute
|
||||
if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
|
||||
if d, err := time.ParseDuration(s); err == nil {
|
||||
loadTimeout = d
|
||||
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||||
loadTimeout = time.Duration(n) * time.Second
|
||||
}
|
||||
}
|
||||
var (
|
||||
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
|
||||
// Negative values are treated as infinite keep alive. Zero is treated as no keep alive.
|
||||
// Default is 5 minutes.
|
||||
KeepAlive = Duration("OLLAMA_KEEP_ALIVE", 5*time.Minute, false)
|
||||
|
||||
if loadTimeout <= 0 {
|
||||
return time.Duration(math.MaxInt64)
|
||||
}
|
||||
|
||||
return loadTimeout
|
||||
}
|
||||
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
|
||||
// Negative or zero values are treated as infinite timeout.
|
||||
// Default is 5 minutes.
|
||||
LoadTimeout = Duration("OLLAMA_LOAD_TIMEOUT", 5*time.Minute, true)
|
||||
)
|
||||
|
||||
func Bool(k string) func() bool {
|
||||
return func() bool {
|
||||
@ -170,7 +162,7 @@ func String(s string) func() string {
|
||||
|
||||
var (
|
||||
LLMLibrary = String("OLLAMA_LLM_LIBRARY")
|
||||
TmpDir = String("OLLAMA_TMPDIR")
|
||||
TempDir = String("OLLAMA_TMPDIR")
|
||||
|
||||
CudaVisibleDevices = String("CUDA_VISIBLE_DEVICES")
|
||||
HipVisibleDevices = String("HIP_VISIBLE_DEVICES")
|
||||
@ -179,13 +171,14 @@ var (
|
||||
HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
|
||||
)
|
||||
|
||||
func Uint(key string, defaultValue uint) func() uint {
|
||||
return func() uint {
|
||||
|
||||
func Uint[T uint | uint16 | uint32 | uint64](key string, defaultValue T) func() T {
|
||||
return func() T {
|
||||
if s := Var(key); s != "" {
|
||||
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
|
||||
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
||||
} else {
|
||||
return uint(n)
|
||||
return T(n)
|
||||
}
|
||||
}
|
||||
|
||||
@ -195,88 +188,91 @@ func Uint(key string, defaultValue uint) func() uint {
|
||||
|
||||
var (
|
||||
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
|
||||
NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
|
||||
NumParallel = Uint("OLLAMA_NUM_PARALLEL", uint(0))
|
||||
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
|
||||
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
|
||||
MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", uint(0))
|
||||
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
|
||||
MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
|
||||
MaxQueue = Uint("OLLAMA_MAX_QUEUE", uint(512))
|
||||
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
|
||||
MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
|
||||
MaxVRAM = Uint("OLLAMA_MAX_VRAM", uint(0))
|
||||
// GPUOverhead reserves a portion of VRAM per GPU. GPUOverhead can be configured via the OLLAMA_GPU_OVERHEAD environment variable.
|
||||
GPUOverhead = Uint("OLLAMA_GPU_OVERHEAD", uint64(0))
|
||||
)
|
||||
|
||||
func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
return func() uint64 {
|
||||
if s := Var(key); s != "" {
|
||||
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
|
||||
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
||||
} else {
|
||||
return n
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue
|
||||
}
|
||||
type desc struct {
|
||||
name string
|
||||
usage string
|
||||
value any
|
||||
defaultValue any
|
||||
}
|
||||
|
||||
// Set aside VRAM per GPU
|
||||
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
|
||||
type EnvVar struct {
|
||||
Name string
|
||||
Value any
|
||||
Description string
|
||||
func (e desc) String() string {
|
||||
return fmt.Sprintf("%s:%v", e.name, e.value)
|
||||
}
|
||||
|
||||
func AsMap() map[string]EnvVar {
|
||||
ret := map[string]EnvVar{
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
|
||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
||||
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
||||
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
||||
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
|
||||
func Vars() []desc {
|
||||
s := []desc{
|
||||
{"OLLAMA_DEBUG", "Enable debug", Debug(), false},
|
||||
{"OLLAMA_FLASH_ATTENTION", "Enabled flash attention", FlashAttention(), false},
|
||||
{"OLLAMA_GPU_OVERHEAD", "Reserve a portion of VRAM per GPU", GPUOverhead(), 0},
|
||||
{"OLLAMA_HOST", "Listen address and port", Host(), "127.0.0.1:11434"},
|
||||
{"OLLAMA_KEEP_ALIVE", "Duration of inactivity before models are unloaded", KeepAlive(), 5 * time.Minute},
|
||||
{"OLLAMA_LLM_LIBRARY", "Set LLM library to bypass autodetection", LLMLibrary(), nil},
|
||||
{"OLLAMA_LOAD_TIMEOUT", "Duration for stall detection during model loads", LoadTimeout(), 5 * time.Minute},
|
||||
{"OLLAMA_MAX_LOADED_MODELS", "Maximum number of loaded models per GPU", MaxRunners(), nil},
|
||||
{"OLLAMA_MAX_QUEUE", "Maximum number of queued requests", MaxQueue(), nil},
|
||||
{"OLLAMA_MAX_VRAM", "Maximum VRAM to consider for model offloading", MaxVRAM(), nil},
|
||||
{"OLLAMA_MODELS", "Path override for models directory", Models(), nil},
|
||||
{"OLLAMA_NOHISTORY", "Disable readline history", NoHistory(), false},
|
||||
{"OLLAMA_NOPRUNE", "Disable unused blob pruning", NoPrune(), false},
|
||||
{"OLLAMA_NUM_PARALLEL", "Maximum number of parallel requests before requests are queued", NumParallel(), nil},
|
||||
{"OLLAMA_ORIGINS", "Additional HTTP Origins to allow", Origins(), nil},
|
||||
{"OLLAMA_SCHED_SPREAD", "Always schedule model across all GPUs", SchedSpread(), false},
|
||||
{"OLLAMA_TMPDIR", "Path override for temporary directory", TempDir(), nil},
|
||||
|
||||
// Informational
|
||||
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
|
||||
"HTTPS_PROXY": {"HTTPS_PROXY", String("HTTPS_PROXY")(), "HTTPS proxy"},
|
||||
"NO_PROXY": {"NO_PROXY", String("NO_PROXY")(), "No proxy"},
|
||||
// informational
|
||||
{"HTTPS_PROXY", "Proxy for HTTPS requests", os.Getenv("HTTPS_PROXY"), nil},
|
||||
{"HTTP_PROXY", "Proxy for HTTP requests", os.Getenv("HTTP_PROXY"), nil},
|
||||
{"NO_PROXY", "No proxy for these hosts", os.Getenv("NO_PROXY"), nil},
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
// Windows environment variables are case-insensitive so there's no need to duplicate them
|
||||
ret["http_proxy"] = EnvVar{"http_proxy", String("http_proxy")(), "HTTP proxy"}
|
||||
ret["https_proxy"] = EnvVar{"https_proxy", String("https_proxy")(), "HTTPS proxy"}
|
||||
ret["no_proxy"] = EnvVar{"no_proxy", String("no_proxy")(), "No proxy"}
|
||||
s = append(
|
||||
s,
|
||||
desc{"https_proxy", "Proxy for HTTPS requests", os.Getenv("https_proxy"), nil},
|
||||
desc{"http_proxy", "Proxy for HTTP requests", os.Getenv("http_proxy"), nil},
|
||||
desc{"no_proxy", "No proxy for these hosts", os.Getenv("no_proxy"), nil},
|
||||
)
|
||||
}
|
||||
|
||||
if runtime.GOOS != "darwin" {
|
||||
ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
|
||||
ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
|
||||
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
|
||||
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
|
||||
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
|
||||
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
|
||||
s = append(
|
||||
s,
|
||||
desc{"CUDA_VISIBLE_DEVICES", "Set which NVIDIA devices are visible", CudaVisibleDevices(), nil},
|
||||
desc{"HIP_VISIBLE_DEVICES", "Set which AMD devices are visible", HipVisibleDevices(), nil},
|
||||
desc{"ROCR_VISIBLE_DEVICES", "Set which AMD devices are visible", RocrVisibleDevices(), nil},
|
||||
desc{"GPU_DEVICE_ORDINAL", "Set which AMD devices are visible", GpuDeviceOrdinal(), nil},
|
||||
desc{"HSA_OVERRIDE_GFX_VERSION", "Override the gfx used for all detected AMD GPUs", HsaOverrideGfxVersion(), nil},
|
||||
desc{"OLLAMA_INTEL_GPU", "Enable experimental Intel GPU detection", IntelGPU(), nil},
|
||||
)
|
||||
}
|
||||
|
||||
return ret
|
||||
return s
|
||||
}
|
||||
|
||||
func Values() map[string]string {
|
||||
vals := make(map[string]string)
|
||||
for k, v := range AsMap() {
|
||||
vals[k] = fmt.Sprintf("%v", v.Value)
|
||||
func Describe(s ...string) map[string]string {
|
||||
vars := Vars()
|
||||
m := make(map[string]string, len(s))
|
||||
for _, k := range s {
|
||||
if i := slices.IndexFunc(vars, func(e desc) bool { return e.name == k }); i != -1 {
|
||||
m[k] = vars[i].usage
|
||||
if vars[i].defaultValue != nil {
|
||||
m[k] = fmt.Sprintf("%s (default: %v)", vars[i].usage, vars[i].defaultValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
return vals
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// Var returns an environment variable stripped of leading and trailing quotes or spaces
|
||||
|
@ -175,7 +175,7 @@ func TestUint(t *testing.T) {
|
||||
for k, v := range cases {
|
||||
t.Run(k, func(t *testing.T) {
|
||||
t.Setenv("OLLAMA_UINT", k)
|
||||
if i := Uint("OLLAMA_UINT", 11434)(); i != v {
|
||||
if i := Uint("OLLAMA_UINT", uint(11434))(); i != v {
|
||||
t.Errorf("%s: expected %d, got %d", k, v, i)
|
||||
}
|
||||
})
|
||||
|
@ -95,7 +95,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
// Overflow that didn't fit into the GPU
|
||||
var overflow uint64
|
||||
|
||||
overhead := envconfig.GpuOverhead()
|
||||
overhead := envconfig.GPUOverhead()
|
||||
availableList := make([]string, len(gpus))
|
||||
for i, gpu := range gpus {
|
||||
availableList[i] = format.HumanBytes2(gpu.FreeMemory)
|
||||
@ -322,7 +322,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
||||
}
|
||||
|
||||
func (m MemoryEstimate) log() {
|
||||
overhead := envconfig.GpuOverhead()
|
||||
overhead := envconfig.GPUOverhead()
|
||||
slog.Info(
|
||||
"offload to "+m.inferenceLibrary,
|
||||
slog.Group(
|
||||
|
@ -119,7 +119,7 @@ func hasPayloads(payloadFS fs.FS) bool {
|
||||
|
||||
func extractRunners(payloadFS fs.FS) (string, error) {
|
||||
cleanupTmpDirs()
|
||||
tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
|
||||
tmpDir, err := os.MkdirTemp(envconfig.TempDir(), "ollama")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to generate tmp dir: %w", err)
|
||||
}
|
||||
@ -224,7 +224,7 @@ func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
|
||||
|
||||
// Best effort to clean up prior tmpdirs
|
||||
func cleanupTmpDirs() {
|
||||
tmpDir := envconfig.TmpDir()
|
||||
tmpDir := envconfig.TempDir()
|
||||
if tmpDir == "" {
|
||||
tmpDir = os.TempDir()
|
||||
}
|
||||
|
@ -1150,7 +1150,7 @@ func Serve(ln net.Listener) error {
|
||||
level = slog.LevelDebug
|
||||
}
|
||||
|
||||
slog.Info("server config", "env", envconfig.Values())
|
||||
slog.Info("server config", "env", envconfig.Vars())
|
||||
handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: level,
|
||||
AddSource: true,
|
||||
|
Loading…
x
Reference in New Issue
Block a user