Compare commits

..

2 Commits

Author SHA1 Message Date
1401b24c79
Remove mem check 2024-11-14 13:26:13 +01:00
Blake Mizerany
67691e410d
cmd: preserve exact bytes when displaying template/system layers (#7586) 2024-11-13 23:53:30 -08:00
3 changed files with 10 additions and 19 deletions

View File

@ -800,9 +800,9 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
case "parameters":
fmt.Println(resp.Parameters)
case "system":
fmt.Println(resp.System)
fmt.Print(resp.System)
case "template":
fmt.Println(resp.Template)
fmt.Print(resp.Template)
}
return nil

View File

@ -161,13 +161,15 @@ func (s *Server) inputs(prompt string, images []ImageData) ([]input, error) {
for i, part := range parts {
// text - tokenize
tokens, err := s.lc.Model().Tokenize(part, i == 0, true)
if err != nil {
return nil, err
}
if strings.TrimSpace(part) != "" {
tokens, err := s.lc.Model().Tokenize(part, i == 0, true)
if err != nil {
return nil, err
}
for _, t := range tokens {
inputs = append(inputs, input{token: t})
for _, t := range tokens {
inputs = append(inputs, input{token: t})
}
}
// image - generate image embedding

View File

@ -128,17 +128,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
}
}
// On linux and windows, over-allocating CPU memory will almost always result in an error
// Darwin has fully dynamic swap so has no direct concept of free swap space
if runtime.GOOS != "darwin" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
available := systemFreeMemory + systemSwapFreeMemory
if systemMemoryRequired > available {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))
}
}
estimate.log()
// Loop through potential servers