diff --git a/README.md b/README.md index 553e0c08..84996372 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,8 @@ curl https://ollama.ai/install.sh | sh ### Docker -See the official [Docker image](https://hub.docker.com/r/ollama/ollama). +The official [Ollama Docker image `ollama/ollama`](https://hub.docker.com/r/ollama/ollama) +is available on Docker Hub. ## Quickstart @@ -88,7 +89,7 @@ See the [guide](docs/import.md) on importing models for more information. ### Customize a prompt -Models from the Ollama library can be customized with a prompt. The example +Models from the Ollama library can be customized with a prompt. For example, to customize the `llama2` model: ``` ollama pull llama2 @@ -178,8 +179,7 @@ ollama list Install `cmake` and `go`: ``` -brew install cmake -brew install go +brew install cmake go ``` Then generate dependencies and build: @@ -203,9 +203,8 @@ Finally, in a separate shell, run a model: ## REST API -See the [API documentation](docs/api.md) for all endpoints. - -Ollama has an API for running and managing models. For example to generate text from a model: +Ollama has a REST API for running and managing models. +For example, to generate text from a model: ``` curl -X POST http://localhost:11434/api/generate -d '{ @@ -214,6 +213,8 @@ curl -X POST http://localhost:11434/api/generate -d '{ }' ``` +See the [API documentation](./docs/api.md) for all endpoints. + ## Community Integrations - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) diff --git a/api/client.go b/api/client.go index 3cf55a25..c89ff38f 100644 --- a/api/client.go +++ b/api/client.go @@ -18,10 +18,6 @@ import ( "github.com/jmorganca/ollama/version" ) -const DefaultHost = "127.0.0.1:11434" - -var envHost = os.Getenv("OLLAMA_HOST") - type Client struct { base *url.URL http http.Client @@ -44,14 +40,24 @@ func checkError(resp *http.Response, body []byte) error { } func ClientFromEnvironment() (*Client, error) { + defaultPort := "11434" + scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://") - if !ok { + switch { + case !ok: scheme, hostport = "http", os.Getenv("OLLAMA_HOST") + case scheme == "http": + defaultPort = "80" + case scheme == "https": + defaultPort = "443" } + // trim trailing slashes + hostport = strings.TrimRight(hostport, "/") + host, port, err := net.SplitHostPort(hostport) if err != nil { - host, port = "127.0.0.1", "11434" + host, port = "127.0.0.1", defaultPort if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil { host = ip.String() } else if hostport != "" { diff --git a/api/client_test.go b/api/client_test.go new file mode 100644 index 00000000..0eafedca --- /dev/null +++ b/api/client_test.go @@ -0,0 +1,43 @@ +package api + +import "testing" + +func TestClientFromEnvironment(t *testing.T) { + type testCase struct { + value string + expect string + err error + } + + testCases := map[string]*testCase{ + "empty": {value: "", expect: "http://127.0.0.1:11434"}, + "only address": {value: "1.2.3.4", expect: "http://1.2.3.4:11434"}, + "only port": {value: ":1234", expect: "http://:1234"}, + "address and port": {value: "1.2.3.4:1234", expect: "http://1.2.3.4:1234"}, + "scheme http and address": {value: "http://1.2.3.4", expect: "http://1.2.3.4:80"}, + "scheme https and address": {value: "https://1.2.3.4", expect: "https://1.2.3.4:443"}, + "scheme, address, and port": {value: "https://1.2.3.4:1234", expect: "https://1.2.3.4:1234"}, + "hostname": {value: "example.com", expect: "http://example.com:11434"}, + "hostname and port": {value: "example.com:1234", expect: "http://example.com:1234"}, + "scheme http and hostname": {value: "http://example.com", expect: "http://example.com:80"}, + "scheme https and hostname": {value: "https://example.com", expect: "https://example.com:443"}, + "scheme, hostname, and port": {value: "https://example.com:1234", expect: "https://example.com:1234"}, + "trailing slash": {value: "example.com/", expect: "http://example.com:11434"}, + "trailing slash port": {value: "example.com:1234/", expect: "http://example.com:1234"}, + } + + for k, v := range testCases { + t.Run(k, func(t *testing.T) { + t.Setenv("OLLAMA_HOST", v.value) + + client, err := ClientFromEnvironment() + if err != v.err { + t.Fatalf("expected %s, got %s", v.err, err) + } + + if client.base.String() != v.expect { + t.Fatalf("expected %s, got %s", v.expect, client.base.String()) + } + }) + } +} diff --git a/cmd/cmd.go b/cmd/cmd.go index d334d431..b0f9681c 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -22,7 +22,6 @@ import ( "github.com/dustin/go-humanize" "github.com/olekukonko/tablewriter" - "github.com/pdevine/readline" "github.com/spf13/cobra" "golang.org/x/crypto/ssh" "golang.org/x/term" @@ -30,30 +29,11 @@ import ( "github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/format" "github.com/jmorganca/ollama/progressbar" + "github.com/jmorganca/ollama/readline" "github.com/jmorganca/ollama/server" "github.com/jmorganca/ollama/version" ) -type Painter struct { - IsMultiLine bool -} - -func (p Painter) Paint(line []rune, _ int) []rune { - termType := os.Getenv("TERM") - if termType == "xterm-256color" && len(line) == 0 { - var prompt string - if p.IsMultiLine { - prompt = "Use \"\"\" to end multi-line input" - } else { - prompt = "Send a message (/? for help)" - } - return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt))) - } - // add a space and a backspace to prevent the cursor from walking up the screen - line = append(line, []rune(" \b")...) - return line -} - func CreateHandler(cmd *cobra.Command, args []string) error { filename, _ := cmd.Flags().GetString("file") filename, err := filepath.Abs(filename) @@ -508,38 +488,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error { } func generateInteractive(cmd *cobra.Command, model string) error { - home, err := os.UserHomeDir() - if err != nil { - return err - } - // load the model if err := generate(cmd, model, "", false); err != nil { return err } - completer := readline.NewPrefixCompleter( - readline.PcItem("/help"), - readline.PcItem("/list"), - readline.PcItem("/set", - readline.PcItem("history"), - readline.PcItem("nohistory"), - readline.PcItem("wordwrap"), - readline.PcItem("nowordwrap"), - readline.PcItem("verbose"), - readline.PcItem("quiet"), - ), - readline.PcItem("/show", - readline.PcItem("license"), - readline.PcItem("modelfile"), - readline.PcItem("parameters"), - readline.PcItem("system"), - readline.PcItem("template"), - ), - readline.PcItem("/exit"), - readline.PcItem("/bye"), - ) - usage := func() { fmt.Fprintln(os.Stderr, "Available Commands:") fmt.Fprintln(os.Stderr, " /set Set session variables") @@ -572,20 +525,17 @@ func generateInteractive(cmd *cobra.Command, model string) error { fmt.Fprintln(os.Stderr, "") } - var painter Painter - - config := readline.Config{ - Painter: &painter, - Prompt: ">>> ", - HistoryFile: filepath.Join(home, ".ollama", "history"), - AutoComplete: completer, + prompt := readline.Prompt{ + Prompt: ">>> ", + AltPrompt: "... ", + Placeholder: "Send a message (/? for help)", + AltPlaceholder: `Use """ to end multi-line input`, } - scanner, err := readline.NewEx(&config) + scanner, err := readline.New(prompt) if err != nil { return err } - defer scanner.Close() var wordWrap bool termType := os.Getenv("TERM") @@ -602,17 +552,20 @@ func generateInteractive(cmd *cobra.Command, model string) error { wordWrap = false } + fmt.Print(readline.StartBracketedPaste) + defer fmt.Printf(readline.EndBracketedPaste) + var multiLineBuffer string - var isMultiLine bool for { line, err := scanner.Readline() switch { case errors.Is(err, io.EOF): + fmt.Println() return nil case errors.Is(err, readline.ErrInterrupt): if line == "" { - fmt.Println("Use Ctrl-D or /bye to exit.") + fmt.Println("\nUse Ctrl-D or /bye to exit.") } continue @@ -623,23 +576,19 @@ func generateInteractive(cmd *cobra.Command, model string) error { line = strings.TrimSpace(line) switch { - case isMultiLine: + case scanner.Prompt.UseAlt: if strings.HasSuffix(line, `"""`) { - isMultiLine = false - painter.IsMultiLine = isMultiLine + scanner.Prompt.UseAlt = false multiLineBuffer += strings.TrimSuffix(line, `"""`) line = multiLineBuffer multiLineBuffer = "" - scanner.SetPrompt(">>> ") } else { multiLineBuffer += line + " " continue } case strings.HasPrefix(line, `"""`): - isMultiLine = true - painter.IsMultiLine = isMultiLine + scanner.Prompt.UseAlt = true multiLineBuffer = strings.TrimPrefix(line, `"""`) + " " - scanner.SetPrompt("... ") continue case strings.HasPrefix(line, "/list"): args := strings.Fields(line) @@ -666,19 +615,6 @@ func generateInteractive(cmd *cobra.Command, model string) error { case "quiet": cmd.Flags().Set("verbose", "false") fmt.Println("Set 'quiet' mode.") - case "mode": - if len(args) > 2 { - switch args[2] { - case "vim": - scanner.SetVimMode(true) - case "emacs", "default": - scanner.SetVimMode(false) - default: - usage() - } - } else { - usage() - } default: fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1]) } diff --git a/docs/faq.md b/docs/faq.md index 2c7711d5..6f91e882 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -16,25 +16,19 @@ journalctl -u ollama If you're running `ollama serve` directly, the logs will be printed to the console. -## How can I expose the Ollama server? +## How can I expose Ollama on my network? Ollama binds to 127.0.0.1 port 11434 by default. Change the bind address with the `OLLAMA_HOST` environment variable. -Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0` by default. Add additional origins with the `OLLAMA_ORIGINS` environment variable: - On macOS: ```bash OLLAMA_HOST=0.0.0.0:11435 ollama serve ``` -```bash -OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve -``` - On Linux: -Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST` and/or `Environment=OLLAMA_ORIGINS` +Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST` ```bash mkdir -p /etc/systemd/system/ollama.service.d @@ -45,11 +39,30 @@ echo "[Service]" >>/etc/systemd/system/ollama.service.d/environment.conf echo "Environment=OLLAMA_HOST=0.0.0.0:11434" >>/etc/systemd/system/ollama.service.d/environment.conf ``` +Reload `systemd` and restart Ollama: + +```bash +systemctl daemon-reload +systemctl restart ollama +``` + +## How can I allow additional web origins to access Ollama? + +Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0` by default. Add additional origins with the `OLLAMA_ORIGINS` environment variable: + +On macOS: + +```bash +OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve +``` + +On Linux: + ```bash echo "Environment=OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com" >>/etc/systemd/system/ollama.service.d/environment.conf ``` -Reload `systemd` and restart Ollama. +Reload `systemd` and restart Ollama: ```bash systemctl daemon-reload @@ -60,3 +73,7 @@ systemctl restart ollama - macOS: Raw model data is stored under `~/.ollama/models`. - Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models` + +### How can I change where Ollama stores models? + +To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service. diff --git a/docs/linux.md b/docs/linux.md index 2433b582..45d0f38f 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -1,12 +1,16 @@ -# Installing Ollama on Linux +# Ollama on Linux -> Note: A one line installer for Ollama is available by running: +## Install + +Install Ollama running this one-liner: > -> ```bash -> curl https://ollama.ai/install.sh | sh -> ``` +```bash +curl https://ollama.ai/install.sh | sh +``` -## Download the `ollama` binary +## Manual install + +### Download the `ollama` binary Ollama is distributed as a self-contained binary. Download it to a directory in your PATH: @@ -15,31 +19,7 @@ sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama sudo chmod +x /usr/bin/ollama ``` -## Start Ollama - -Start Ollama by running `ollama serve`: - -```bash -ollama serve -``` - -Once Ollama is running, run a model in another terminal session: - -```bash -ollama run llama2 -``` - -## Install CUDA drivers (optional – for Nvidia GPUs) - -[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA. - -Verify that the drivers are installed by running the following command, which should print details about your GPU: - -```bash -nvidia-smi -``` - -## Adding Ollama as a startup service (optional) +### Adding Ollama as a startup service (recommended) Create a user for Ollama: @@ -60,7 +40,6 @@ User=ollama Group=ollama Restart=always RestartSec=3 -Environment="HOME=/usr/share/ollama" [Install] WantedBy=default.target @@ -73,7 +52,40 @@ sudo systemctl daemon-reload sudo systemctl enable ollama ``` -### Viewing logs +### Install CUDA drivers (optional – for Nvidia GPUs) + +[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA. + +Verify that the drivers are installed by running the following command, which should print details about your GPU: + +```bash +nvidia-smi +``` + +### Start Ollama + +Start Ollama using `systemd`: + +```bash +sudo systemctl start ollama +``` + +## Update + +Update ollama by running the install script again: + +```bash +curl https://ollama.ai/install.sh | sh +``` + +Or by downloading the ollama binary: + +```bash +sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama +sudo chmod +x /usr/bin/ollama +``` + +## Viewing logs To view logs of Ollama running as a startup service, run: @@ -84,19 +96,21 @@ journalctl -u ollama ## Uninstall Remove the ollama service: + ```bash -systemctl stop ollama -systemctl disable ollama -rm /etc/systemd/system/ollama.service +sudo systemctl stop ollama +sudo systemctl disable ollama +sudo rm /etc/systemd/system/ollama.service ``` Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`): + ```bash -rm /usr/local/bin/ollama +sudo rm $(which ollama) ``` Remove the downloaded models and Ollama service user: ```bash -rm /usr/share/ollama -userdel ollama +sudo rm -r /usr/share/ollama +sudo userdel ollama ``` diff --git a/go.mod b/go.mod index 83046b88..1f8860cc 100644 --- a/go.mod +++ b/go.mod @@ -4,13 +4,14 @@ go 1.20 require ( github.com/dustin/go-humanize v1.0.1 + github.com/emirpasic/gods v1.18.1 github.com/gin-gonic/gin v1.9.1 github.com/mattn/go-runewidth v0.0.14 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db github.com/olekukonko/tablewriter v0.0.5 - github.com/pdevine/readline v1.5.2 github.com/spf13/cobra v1.7.0 golang.org/x/sync v0.3.0 + gonum.org/v1/gonum v0.14.0 ) require github.com/rivo/uniseg v0.2.0 // indirect @@ -39,12 +40,12 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/crypto v0.10.0 + golang.org/x/crypto v0.14.0 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 - golang.org/x/net v0.10.0 // indirect - golang.org/x/sys v0.11.0 // indirect - golang.org/x/term v0.10.0 - golang.org/x/text v0.10.0 // indirect + golang.org/x/net v0.17.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/term v0.13.0 + golang.org/x/text v0.13.0 // indirect google.golang.org/protobuf v1.30.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 550e88ca..b12628a1 100644 --- a/go.sum +++ b/go.sum @@ -4,10 +4,6 @@ github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZX github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= -github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= -github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= -github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= -github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -15,6 +11,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= +github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g= @@ -78,8 +76,6 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pdevine/readline v1.5.2 h1:oz6Y5GdTmhPG+08hhxcAvtHitSANWuA2100Sppb38xI= -github.com/pdevine/readline v1.5.2/go.mod h1:na/LbuE5PYwxI7GyopWdIs3U8HVe89lYlNTFTXH3wOw= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= @@ -118,33 +114,34 @@ golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUu golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= -golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= -golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= -golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= +golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= -golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0= +gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= diff --git a/llm/llama.cpp/generate_linux.go b/llm/llama.cpp/generate_linux.go index 23920265..2e8264ce 100644 --- a/llm/llama.cpp/generate_linux.go +++ b/llm/llama.cpp/generate_linux.go @@ -14,13 +14,13 @@ package llm //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch -//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on +//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off //go:generate cmake --build gguf/build/cpu --target server --config Release //go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner //go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on //go:generate cmake --build ggml/build/cuda --target server --config Release //go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner -//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on +//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off //go:generate cmake --build gguf/build/cuda --target server --config Release //go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner diff --git a/llm/llama.cpp/generate_windows.go b/llm/llama.cpp/generate_windows.go index 3db1a3e0..326e83d5 100644 --- a/llm/llama.cpp/generate_windows.go +++ b/llm/llama.cpp/generate_windows.go @@ -11,6 +11,6 @@ package llm //go:generate git submodule update --force gguf //go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch -//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on +//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off //go:generate cmake --build gguf/build/cpu --target server --config Release //go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe diff --git a/llm/llama.go b/llm/llama.go index 61288b52..8e2050c6 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -212,6 +212,10 @@ func CheckVRAM() (int64, error) { scanner := bufio.NewScanner(&stdout) for scanner.Scan() { line := scanner.Text() + if strings.Contains(line, "[Insufficient Permissions]") { + return 0, fmt.Errorf("GPU support may not enabled, check you have installed GPU drivers and have the necessary permissions to run nvidia-smi") + } + vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64) if err != nil { return 0, fmt.Errorf("failed to parse available VRAM: %v", err) @@ -243,12 +247,15 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int { return 0 } - // Calculate bytes per layer - // TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size + /* + Calculate bytes per layer, this will roughly be the size of the model file divided by the number of layers. + We can store the model weights and the kv cache in vram, + to enable kv chache vram storage add two additional layers to the number of layers retrieved from the model file. + */ bytesPerLayer := fileSizeBytes / numLayer - // max number of layers we can fit in VRAM, subtract 8% to prevent consuming all available VRAM and running out of memory - layers := int(freeBytes/bytesPerLayer) * 92 / 100 + // 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors + layers := int(freeBytes/bytesPerLayer) * 3 / 4 log.Printf("%d MB VRAM available, loading up to %d GPU layers", freeBytes/(1024*1024), layers) return layers diff --git a/readline/buffer.go b/readline/buffer.go new file mode 100644 index 00000000..8b680282 --- /dev/null +++ b/readline/buffer.go @@ -0,0 +1,370 @@ +package readline + +import ( + "fmt" + + "github.com/emirpasic/gods/lists/arraylist" + "golang.org/x/term" +) + +type Buffer struct { + Pos int + Buf *arraylist.List + Prompt *Prompt + LineWidth int + Width int + Height int +} + +func NewBuffer(prompt *Prompt) (*Buffer, error) { + width, height, err := term.GetSize(0) + if err != nil { + fmt.Println("Error getting size:", err) + return nil, err + } + + lwidth := width - len(prompt.Prompt) + if prompt.UseAlt { + lwidth = width - len(prompt.AltPrompt) + } + + b := &Buffer{ + Pos: 0, + Buf: arraylist.New(), + Prompt: prompt, + Width: width, + Height: height, + LineWidth: lwidth, + } + + return b, nil +} + +func (b *Buffer) MoveLeft() { + if b.Pos > 0 { + if b.Pos%b.LineWidth == 0 { + fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width)) + } else { + fmt.Print(CursorLeft) + } + b.Pos -= 1 + } +} + +func (b *Buffer) MoveLeftWord() { + if b.Pos > 0 { + var foundNonspace bool + for { + v, _ := b.Buf.Get(b.Pos - 1) + if v == ' ' { + if foundNonspace { + break + } + } else { + foundNonspace = true + } + b.MoveLeft() + + if b.Pos == 0 { + break + } + } + } +} + +func (b *Buffer) MoveRight() { + if b.Pos < b.Size() { + b.Pos += 1 + if b.Pos%b.LineWidth == 0 { + fmt.Printf(CursorDown + CursorBOL + cursorRightN(b.PromptSize())) + } else { + fmt.Print(CursorRight) + } + } +} + +func (b *Buffer) MoveRightWord() { + if b.Pos < b.Size() { + for { + b.MoveRight() + v, _ := b.Buf.Get(b.Pos) + if v == ' ' { + break + } + + if b.Pos == b.Size() { + break + } + } + } +} + +func (b *Buffer) MoveToStart() { + if b.Pos > 0 { + currLine := b.Pos / b.LineWidth + if currLine > 0 { + for cnt := 0; cnt < currLine; cnt++ { + fmt.Print(CursorUp) + } + } + fmt.Printf(CursorBOL + cursorRightN(b.PromptSize())) + b.Pos = 0 + } +} + +func (b *Buffer) MoveToEnd() { + if b.Pos < b.Size() { + currLine := b.Pos / b.LineWidth + totalLines := b.Size() / b.LineWidth + if currLine < totalLines { + for cnt := 0; cnt < totalLines-currLine; cnt++ { + fmt.Print(CursorDown) + } + remainder := b.Size() % b.LineWidth + fmt.Printf(CursorBOL + cursorRightN(b.PromptSize()+remainder)) + } else { + fmt.Print(cursorRightN(b.Size() - b.Pos)) + } + + b.Pos = b.Size() + } +} + +func (b *Buffer) Size() int { + return b.Buf.Size() +} + +func min(n, m int) int { + if n > m { + return m + } + return n +} + +func (b *Buffer) PromptSize() int { + if b.Prompt.UseAlt { + return len(b.Prompt.AltPrompt) + } + return len(b.Prompt.Prompt) +} + +func (b *Buffer) Add(r rune) { + if b.Pos == b.Buf.Size() { + fmt.Printf("%c", r) + b.Buf.Add(r) + b.Pos += 1 + if b.Pos > 0 && b.Pos%b.LineWidth == 0 { + fmt.Printf("\n%s", b.Prompt.AltPrompt) + } + } else { + fmt.Printf("%c", r) + b.Buf.Insert(b.Pos, r) + b.Pos += 1 + if b.Pos > 0 && b.Pos%b.LineWidth == 0 { + fmt.Printf("\n%s", b.Prompt.AltPrompt) + } + b.drawRemaining() + } +} + +func (b *Buffer) drawRemaining() { + var place int + remainingText := b.StringN(b.Pos) + if b.Pos > 0 { + place = b.Pos % b.LineWidth + } + fmt.Print(CursorHide) + + // render the rest of the current line + currLine := remainingText[:min(b.LineWidth-place, len(remainingText))] + if len(currLine) > 0 { + fmt.Printf(ClearToEOL + currLine) + fmt.Print(cursorLeftN(len(currLine))) + } else { + fmt.Print(ClearToEOL) + } + + // render the other lines + if len(remainingText) > len(currLine) { + remaining := []rune(remainingText[len(currLine):]) + var totalLines int + for i, c := range remaining { + if i%b.LineWidth == 0 { + fmt.Printf("\n%s", b.Prompt.AltPrompt) + totalLines += 1 + } + fmt.Printf("%c", c) + } + fmt.Print(ClearToEOL) + fmt.Print(cursorUpN(totalLines)) + fmt.Printf(CursorBOL + cursorRightN(b.Width-len(currLine))) + } + + fmt.Print(CursorShow) +} + +func (b *Buffer) Remove() { + if b.Buf.Size() > 0 && b.Pos > 0 { + if b.Pos%b.LineWidth == 0 { + // if the user backspaces over the word boundary, do this magic to clear the line + // and move to the end of the previous line + fmt.Printf(CursorBOL + ClearToEOL) + fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width) + " " + CursorLeft) + } else { + fmt.Printf(CursorLeft + " " + CursorLeft) + } + + var eraseExtraLine bool + if (b.Size()-1)%b.LineWidth == 0 { + eraseExtraLine = true + } + + b.Pos -= 1 + b.Buf.Remove(b.Pos) + + if b.Pos < b.Size() { + b.drawRemaining() + // this erases a line which is left over when backspacing in the middle of a line and there + // are trailing characters which go over the line width boundary + if eraseExtraLine { + remainingLines := (b.Size() - b.Pos) / b.LineWidth + fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL) + place := b.Pos % b.LineWidth + fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.Prompt))) + } + } + } +} + +func (b *Buffer) Delete() { + if b.Size() > 0 && b.Pos < b.Size() { + b.Buf.Remove(b.Pos) + b.drawRemaining() + if b.Size()%b.LineWidth == 0 { + if b.Pos != b.Size() { + remainingLines := (b.Size() - b.Pos) / b.LineWidth + fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL) + place := b.Pos % b.LineWidth + fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.Prompt))) + } + } + } +} + +func (b *Buffer) DeleteBefore() { + if b.Pos > 0 { + for cnt := b.Pos - 1; cnt >= 0; cnt-- { + b.Remove() + } + } +} + +func (b *Buffer) DeleteRemaining() { + if b.Size() > 0 && b.Pos < b.Size() { + charsToDel := b.Size() - b.Pos + for cnt := 0; cnt < charsToDel; cnt++ { + b.Delete() + } + } +} + +func (b *Buffer) DeleteWord() { + if b.Buf.Size() > 0 && b.Pos > 0 { + var foundNonspace bool + for { + v, _ := b.Buf.Get(b.Pos - 1) + if v == ' ' { + if !foundNonspace { + b.Remove() + } else { + break + } + } else { + foundNonspace = true + b.Remove() + } + + if b.Pos == 0 { + break + } + } + } +} + +func (b *Buffer) ClearScreen() { + fmt.Printf(ClearScreen + CursorReset + b.Prompt.Prompt) + if b.IsEmpty() { + ph := b.Prompt.Placeholder + fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault) + } else { + currPos := b.Pos + b.Pos = 0 + b.drawRemaining() + fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.Prompt))) + if currPos > 0 { + targetLine := currPos / b.LineWidth + if targetLine > 0 { + for cnt := 0; cnt < targetLine; cnt++ { + fmt.Print(CursorDown) + } + } + remainder := currPos % b.LineWidth + if remainder > 0 { + fmt.Print(cursorRightN(remainder)) + } + if currPos%b.LineWidth == 0 { + fmt.Printf(CursorBOL + b.Prompt.AltPrompt) + } + } + b.Pos = currPos + } +} + +func (b *Buffer) IsEmpty() bool { + return b.Buf.Empty() +} + +func (b *Buffer) Replace(r []rune) { + b.Pos = 0 + b.Buf.Clear() + fmt.Printf(ClearLine + CursorBOL + b.Prompt.Prompt) + for _, c := range r { + b.Add(c) + } +} + +func (b *Buffer) String() string { + return b.StringN(0) +} + +func (b *Buffer) StringN(n int) string { + return b.StringNM(n, 0) +} + +func (b *Buffer) StringNM(n, m int) string { + var s string + if m == 0 { + m = b.Size() + } + for cnt := n; cnt < m; cnt++ { + c, _ := b.Buf.Get(cnt) + s += string(c.(rune)) + } + return s +} + +func cursorLeftN(n int) string { + return fmt.Sprintf(CursorLeftN, n) +} + +func cursorRightN(n int) string { + return fmt.Sprintf(CursorRightN, n) +} + +func cursorUpN(n int) string { + return fmt.Sprintf(CursorUpN, n) +} + +func cursorDownN(n int) string { + return fmt.Sprintf(CursorDownN, n) +} diff --git a/readline/errors.go b/readline/errors.go new file mode 100644 index 00000000..40e40cb7 --- /dev/null +++ b/readline/errors.go @@ -0,0 +1,17 @@ +package readline + +import ( + "errors" +) + +var ( + ErrInterrupt = errors.New("Interrupt") +) + +type InterruptError struct { + Line []rune +} + +func (*InterruptError) Error() string { + return "Interrupted" +} diff --git a/readline/history.go b/readline/history.go new file mode 100644 index 00000000..5a7c34e2 --- /dev/null +++ b/readline/history.go @@ -0,0 +1,152 @@ +package readline + +import ( + "bufio" + "errors" + "io" + "os" + "path/filepath" + "strings" + + "github.com/emirpasic/gods/lists/arraylist" +) + +type History struct { + Buf *arraylist.List + Autosave bool + Pos int + Limit int + Filename string + Enabled bool +} + +func NewHistory() (*History, error) { + h := &History{ + Buf: arraylist.New(), + Limit: 100, //resizeme + Autosave: true, + Enabled: true, + } + + err := h.Init() + if err != nil { + return nil, err + } + + return h, nil +} + +func (h *History) Init() error { + home, err := os.UserHomeDir() + if err != nil { + return err + } + + path := filepath.Join(home, ".ollama", "history") + h.Filename = path + + //todo check if the file exists + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0600) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + } + defer f.Close() + + r := bufio.NewReader(f) + for { + line, err := r.ReadString('\n') + if err != nil { + if err == io.EOF { + break + } + return err + } + + line = strings.TrimSpace(line) + if len(line) == 0 { + continue + } + + h.Add([]rune(line)) + } + + return nil +} + +func (h *History) Add(l []rune) { + h.Buf.Add(l) + h.Compact() + h.Pos = h.Size() + if h.Autosave { + h.Save() + } +} + +func (h *History) Compact() { + s := h.Buf.Size() + if s > h.Limit { + for cnt := 0; cnt < s-h.Limit; cnt++ { + h.Buf.Remove(0) + } + } +} + +func (h *History) Clear() { + h.Buf.Clear() +} + +func (h *History) Prev() []rune { + var line []rune + if h.Pos > 0 { + h.Pos -= 1 + } + v, _ := h.Buf.Get(h.Pos) + line, _ = v.([]rune) + return line +} + +func (h *History) Next() []rune { + var line []rune + if h.Pos < h.Buf.Size() { + h.Pos += 1 + v, _ := h.Buf.Get(h.Pos) + line, _ = v.([]rune) + } + return line +} + +func (h *History) Size() int { + return h.Buf.Size() +} + +func (h *History) Save() error { + if !h.Enabled { + return nil + } + + tmpFile := h.Filename + ".tmp" + + f, err := os.OpenFile(tmpFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|os.O_APPEND, 0666) + if err != nil { + return err + } + defer f.Close() + + buf := bufio.NewWriter(f) + for cnt := 0; cnt < h.Size(); cnt++ { + v, _ := h.Buf.Get(cnt) + line, _ := v.([]rune) + buf.WriteString(string(line) + "\n") + } + buf.Flush() + f.Close() + + if err = os.Rename(tmpFile, h.Filename); err != nil { + return err + } + + return nil +} diff --git a/readline/readline.go b/readline/readline.go new file mode 100644 index 00000000..d1fe2c8d --- /dev/null +++ b/readline/readline.go @@ -0,0 +1,253 @@ +package readline + +import ( + "bufio" + "fmt" + "io" + "os" + "syscall" +) + +type Prompt struct { + Prompt string + AltPrompt string + Placeholder string + AltPlaceholder string + UseAlt bool +} + +type Terminal struct { + outchan chan rune +} + +type Instance struct { + Prompt *Prompt + Terminal *Terminal + History *History +} + +func New(prompt Prompt) (*Instance, error) { + term, err := NewTerminal() + if err != nil { + return nil, err + } + + history, err := NewHistory() + if err != nil { + return nil, err + } + + return &Instance{ + Prompt: &prompt, + Terminal: term, + History: history, + }, nil +} + +func (i *Instance) Readline() (string, error) { + prompt := i.Prompt.Prompt + if i.Prompt.UseAlt { + prompt = i.Prompt.AltPrompt + } + fmt.Print(prompt) + + termios, err := SetRawMode(syscall.Stdin) + if err != nil { + return "", err + } + defer UnsetRawMode(syscall.Stdin, termios) + + buf, _ := NewBuffer(i.Prompt) + + var esc bool + var escex bool + var metaDel bool + var pasteMode PasteMode + + var currentLineBuf []rune + + for { + if buf.IsEmpty() { + ph := i.Prompt.Placeholder + if i.Prompt.UseAlt { + ph = i.Prompt.AltPlaceholder + } + fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault) + } + + r, err := i.Terminal.Read() + + if buf.IsEmpty() { + fmt.Print(ClearToEOL) + } + + if err != nil { + return "", io.EOF + } + + if escex { + escex = false + + switch r { + case KeyUp: + if i.History.Pos > 0 { + if i.History.Pos == i.History.Size() { + currentLineBuf = []rune(buf.String()) + } + buf.Replace(i.History.Prev()) + } + case KeyDown: + if i.History.Pos < i.History.Size() { + buf.Replace(i.History.Next()) + if i.History.Pos == i.History.Size() { + buf.Replace(currentLineBuf) + } + } + case KeyLeft: + buf.MoveLeft() + case KeyRight: + buf.MoveRight() + case CharBracketedPaste: + var code string + for cnt := 0; cnt < 3; cnt++ { + r, err = i.Terminal.Read() + if err != nil { + return "", io.EOF + } + + code += string(r) + } + if code == CharBracketedPasteStart { + pasteMode = PasteModeStart + } else if code == CharBracketedPasteEnd { + pasteMode = PasteModeEnd + } + case KeyDel: + if buf.Size() > 0 { + buf.Delete() + } + metaDel = true + case MetaStart: + buf.MoveToStart() + case MetaEnd: + buf.MoveToEnd() + default: + // skip any keys we don't know about + continue + } + continue + } else if esc { + esc = false + + switch r { + case 'b': + buf.MoveLeftWord() + case 'f': + buf.MoveRightWord() + case CharEscapeEx: + escex = true + } + continue + } + + switch r { + case CharNull: + continue + case CharEsc: + esc = true + case CharInterrupt: + return "", ErrInterrupt + case CharLineStart: + buf.MoveToStart() + case CharLineEnd: + buf.MoveToEnd() + case CharBackward: + buf.MoveLeft() + case CharForward: + buf.MoveRight() + case CharBackspace, CharCtrlH: + buf.Remove() + case CharTab: + // todo: convert back to real tabs + for cnt := 0; cnt < 8; cnt++ { + buf.Add(' ') + } + case CharDelete: + if buf.Size() > 0 { + buf.Delete() + } else { + return "", io.EOF + } + case CharKill: + buf.DeleteRemaining() + case CharCtrlU: + buf.DeleteBefore() + case CharCtrlL: + buf.ClearScreen() + case CharCtrlW: + buf.DeleteWord() + case CharEnter: + output := buf.String() + if output != "" { + i.History.Add([]rune(output)) + } + buf.MoveToEnd() + fmt.Println() + switch pasteMode { + case PasteModeStart: + output = `"""` + output + case PasteModeEnd: + output = output + `"""` + } + return output, nil + default: + if metaDel { + metaDel = false + continue + } + if r >= CharSpace || r == CharEnter { + buf.Add(r) + } + } + } +} + +func (i *Instance) HistoryEnable() { + i.History.Enabled = true +} + +func (i *Instance) HistoryDisable() { + i.History.Enabled = false +} + +func NewTerminal() (*Terminal, error) { + t := &Terminal{ + outchan: make(chan rune), + } + + go t.ioloop() + + return t, nil +} + +func (t *Terminal) ioloop() { + buf := bufio.NewReader(os.Stdin) + + for { + r, _, err := buf.ReadRune() + if err != nil { + close(t.outchan) + break + } + t.outchan <- r + } +} + +func (t *Terminal) Read() (rune, error) { + r, ok := <-t.outchan + if !ok { + return 0, io.EOF + } + + return r, nil +} diff --git a/readline/term.go b/readline/term.go new file mode 100644 index 00000000..45757e6a --- /dev/null +++ b/readline/term.go @@ -0,0 +1,36 @@ +//go:build aix || darwin || dragonfly || freebsd || (linux && !appengine) || netbsd || openbsd || os400 || solaris + +package readline + +import ( + "syscall" +) + +type Termios syscall.Termios + +func SetRawMode(fd int) (*Termios, error) { + termios, err := getTermios(fd) + if err != nil { + return nil, err + } + + newTermios := *termios + newTermios.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON + newTermios.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN + newTermios.Cflag &^= syscall.CSIZE | syscall.PARENB + newTermios.Cflag |= syscall.CS8 + newTermios.Cc[syscall.VMIN] = 1 + newTermios.Cc[syscall.VTIME] = 0 + + return termios, setTermios(fd, &newTermios) +} + +func UnsetRawMode(fd int, termios *Termios) error { + return setTermios(fd, termios) +} + +// IsTerminal returns true if the given file descriptor is a terminal. +func IsTerminal(fd int) bool { + _, err := getTermios(fd) + return err == nil +} diff --git a/readline/term_bsd.go b/readline/term_bsd.go new file mode 100644 index 00000000..6dcf42fc --- /dev/null +++ b/readline/term_bsd.go @@ -0,0 +1,24 @@ +//go:build darwin || freebsd || netbsd || openbsd +package readline + +import ( + "syscall" + "unsafe" +) + +func getTermios(fd int) (*Termios, error) { + termios := new(Termios) + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return nil, err + } + return termios, nil +} + +func setTermios(fd int, termios *Termios) error { + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return err + } + return nil +} diff --git a/readline/term_linux.go b/readline/term_linux.go new file mode 100644 index 00000000..9028f52e --- /dev/null +++ b/readline/term_linux.go @@ -0,0 +1,27 @@ +//go:build linux || solaris +package readline + +import ( + "syscall" + "unsafe" +) + +const tcgets = 0x5401 +const tcsets = 0x5402 + +func getTermios(fd int) (*Termios, error) { + termios := new(Termios) + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return nil, err + } + return termios, nil +} + +func setTermios(fd int, termios *Termios) error { + _, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0) + if err != 0 { + return err + } + return nil +} diff --git a/readline/types.go b/readline/types.go new file mode 100644 index 00000000..03fa526d --- /dev/null +++ b/readline/types.go @@ -0,0 +1,86 @@ +package readline + +const ( + CharNull = 0 + CharLineStart = 1 + CharBackward = 2 + CharInterrupt = 3 + CharDelete = 4 + CharLineEnd = 5 + CharForward = 6 + CharBell = 7 + CharCtrlH = 8 + CharTab = 9 + CharCtrlJ = 10 + CharKill = 11 + CharCtrlL = 12 + CharEnter = 13 + CharNext = 14 + CharPrev = 16 + CharBckSearch = 18 + CharFwdSearch = 19 + CharTranspose = 20 + CharCtrlU = 21 + CharCtrlW = 23 + CharCtrlY = 25 + CharCtrlZ = 26 + CharEsc = 27 + CharSpace = 32 + CharEscapeEx = 91 + CharBackspace = 127 +) + +const ( + KeyDel = 51 + KeyUp = 65 + KeyDown = 66 + KeyRight = 67 + KeyLeft = 68 + MetaEnd = 70 + MetaStart = 72 +) + +const ( + CursorUp = "\033[1A" + CursorDown = "\033[1B" + CursorRight = "\033[1C" + CursorLeft = "\033[1D" + + CursorSave = "\033[s" + CursorRestore = "\033[u" + + CursorUpN = "\033[%dA" + CursorDownN = "\033[%dB" + CursorRightN = "\033[%dC" + CursorLeftN = "\033[%dD" + + CursorEOL = "\033[E" + CursorBOL = "\033[1G" + CursorHide = "\033[?25l" + CursorShow = "\033[?25h" + + ClearToEOL = "\033[K" + ClearLine = "\033[2K" + ClearScreen = "\033[2J" + CursorReset = "\033[0;0f" + + ColorGrey = "\033[38;5;245m" + ColorDefault = "\033[0m" + + StartBracketedPaste = "\033[?2004h" + EndBracketedPaste = "\033[?2004l" +) + +const ( + CharBracketedPaste = 50 + CharBracketedPasteStart = "00~" + CharBracketedPasteEnd = "01~" +) + +type PasteMode int + +const ( + PastModeOff = iota + PasteModeStart + PasteModeEnd +) diff --git a/scripts/install.sh b/scripts/install.sh index 93bd1eaf..1fc0c139 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -89,7 +89,6 @@ User=ollama Group=ollama Restart=always RestartSec=3 -Environment="HOME=/usr/share/ollama" Environment="PATH=$PATH" [Install] diff --git a/server/download.go b/server/download.go index acfa8e52..fa559f0c 100644 --- a/server/download.go +++ b/server/download.go @@ -15,6 +15,7 @@ import ( "strings" "sync" "sync/atomic" + "syscall" "time" "golang.org/x/sync/errgroup" @@ -158,7 +159,8 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis w := io.NewOffsetWriter(file, part.StartsAt()) err := b.downloadChunk(inner, requestURL, w, part, opts) switch { - case errors.Is(err, context.Canceled): + case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC): + // return immediately if the context is canceled or the device is out of space return err case err != nil: log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err) diff --git a/server/images.go b/server/images.go index 5ae8d008..e249f8f9 100644 --- a/server/images.go +++ b/server/images.go @@ -131,7 +131,7 @@ func (m *ManifestV2) GetTotalSize() (total int64) { } func GetManifest(mp ModelPath) (*ManifestV2, string, error) { - fp, err := mp.GetManifestPath(false) + fp, err := mp.GetManifestPath() if err != nil { return nil, "", err } @@ -595,10 +595,13 @@ func CreateManifest(name string, cfg *LayerReader, layers []*Layer) error { return err } - fp, err := mp.GetManifestPath(true) + fp, err := mp.GetManifestPath() if err != nil { return err } + if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil { + return err + } return os.WriteFile(fp, manifestJSON, 0o644) } @@ -710,16 +713,19 @@ func CreateLayer(f io.ReadSeeker) (*LayerReader, error) { func CopyModel(src, dest string) error { srcModelPath := ParseModelPath(src) - srcPath, err := srcModelPath.GetManifestPath(false) + srcPath, err := srcModelPath.GetManifestPath() if err != nil { return err } destModelPath := ParseModelPath(dest) - destPath, err := destModelPath.GetManifestPath(true) + destPath, err := destModelPath.GetManifestPath() if err != nil { return err } + if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil { + return err + } // copy the file input, err := os.ReadFile(srcPath) @@ -882,7 +888,7 @@ func DeleteModel(name string) error { return err } - fp, err := mp.GetManifestPath(false) + fp, err := mp.GetManifestPath() if err != nil { return err } @@ -1121,10 +1127,13 @@ func PullModel(ctx context.Context, name string, regOpts *RegistryOptions, fn fu return err } - fp, err := mp.GetManifestPath(true) + fp, err := mp.GetManifestPath() if err != nil { return err } + if err := os.MkdirAll(filepath.Dir(fp), 0o755); err != nil { + return err + } err = os.WriteFile(fp, manifestJSON, 0o644) if err != nil { diff --git a/server/modelpath.go b/server/modelpath.go index c6798a02..13b26eeb 100644 --- a/server/modelpath.go +++ b/server/modelpath.go @@ -85,20 +85,27 @@ func (mp ModelPath) GetShortTagname() string { return fmt.Sprintf("%s/%s/%s:%s", mp.Registry, mp.Namespace, mp.Repository, mp.Tag) } -func (mp ModelPath) GetManifestPath(createDir bool) (string, error) { +// modelsDir returns the value of the OLLAMA_MODELS environment variable or the user's home directory if OLLAMA_MODELS is not set. +// The models directory is where Ollama stores its model files and manifests. +func modelsDir() (string, error) { + if models, exists := os.LookupEnv("OLLAMA_MODELS"); exists { + return models, nil + } home, err := os.UserHomeDir() if err != nil { return "", err } + return filepath.Join(home, ".ollama", "models"), nil +} - path := filepath.Join(home, ".ollama", "models", "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag) - if createDir { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return "", err - } +// GetManifestPath returns the path to the manifest file for the given model path, it is up to the caller to create the directory if it does not exist. +func (mp ModelPath) GetManifestPath() (string, error) { + dir, err := modelsDir() + if err != nil { + return "", err } - return path, nil + return filepath.Join(dir, "manifests", mp.Registry, mp.Namespace, mp.Repository, mp.Tag), nil } func (mp ModelPath) BaseURL() *url.URL { @@ -109,12 +116,12 @@ func (mp ModelPath) BaseURL() *url.URL { } func GetManifestPath() (string, error) { - home, err := os.UserHomeDir() + dir, err := modelsDir() if err != nil { return "", err } - path := filepath.Join(home, ".ollama", "models", "manifests") + path := filepath.Join(dir, "manifests") if err := os.MkdirAll(path, 0o755); err != nil { return "", err } @@ -123,7 +130,7 @@ func GetManifestPath() (string, error) { } func GetBlobsPath(digest string) (string, error) { - home, err := os.UserHomeDir() + dir, err := modelsDir() if err != nil { return "", err } @@ -132,7 +139,7 @@ func GetBlobsPath(digest string) (string, error) { digest = strings.ReplaceAll(digest, ":", "-") } - path := filepath.Join(home, ".ollama", "models", "blobs", digest) + path := filepath.Join(dir, "blobs", digest) dirPath := filepath.Dir(path) if digest == "" { dirPath = path