runner.go: Don't trim whitespace from inputs

It's possible to get prompts that consist entirely of whitespace - this is most likely to happen when generating embeddings. Currently, we will trim this away, leaving an empty prompt, which will then generate an error. Generating embeddings from whitespace should not trigger an error, as this may break pipelines. It's better to just leave the whitespace in place and process what we are given. This is consistent with past versions of Ollama. Bug #7578
2024-11-13 17:00:38 -08:00
3 changed files with 19 additions and 10 deletions
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -800,9 +800,9 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		case "parameters":
 			fmt.Println(resp.Parameters)
 		case "system":
-			fmt.Print(resp.System)
+			fmt.Println(resp.System)
 		case "template":
-			fmt.Print(resp.Template)
+			fmt.Println(resp.Template)
 		}

 		return nil
--- a/llama/runner/runner.go
+++ b/llama/runner/runner.go
@ -161,15 +161,13 @@ func (s *Server) inputs(prompt string, images []ImageData) ([]input, error) {

 	for i, part := range parts {
 		// text - tokenize
-		if strings.TrimSpace(part) != "" {
-			tokens, err := s.lc.Model().Tokenize(part, i == 0, true)
-			if err != nil {
-				return nil, err
-			}
+		tokens, err := s.lc.Model().Tokenize(part, i == 0, true)
+		if err != nil {
+			return nil, err
+		}

-			for _, t := range tokens {
-				inputs = append(inputs, input{token: t})
-			}
+		for _, t := range tokens {
+			inputs = append(inputs, input{token: t})
 		}

 		// image - generate image embedding
--- a/llm/server.go
+++ b/llm/server.go
@ -128,6 +128,17 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 		}
 	}

+	// On linux and windows, over-allocating CPU memory will almost always result in an error
+	// Darwin has fully dynamic swap so has no direct concept of free swap space
+	if runtime.GOOS != "darwin" {
+		systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
+		available := systemFreeMemory + systemSwapFreeMemory
+		if systemMemoryRequired > available {
+			slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
+			return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))
+		}
+	}
+
 	estimate.log()

 	// Loop through potential servers