From 55ea963c9e9033d01c7c20a54c5ede5babb6878e Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Wed, 25 Sep 2024 11:11:22 -0700 Subject: [PATCH] update default model to llama3.2 (#6959) --- README.md | 30 +++++---- app/ollama.iss | 2 +- app/ollama_welcome.ps1 | 2 +- docs/api.md | 64 +++++++++---------- docs/docker.md | 2 +- docs/faq.md | 10 +-- docs/modelfile.md | 8 +-- docs/openai.md | 22 +++---- docs/template.md | 2 +- docs/tutorials/langchainjs.md | 4 +- docs/windows.md | 2 +- examples/go-chat/main.go | 2 +- .../langchain-python-rag-document/README.md | 4 +- .../langchain-python-rag-document/main.py | 2 +- .../langchain-python-rag-websummary/README.md | 4 +- .../langchain-python-rag-websummary/main.py | 2 +- examples/langchain-python-simple/README.md | 4 +- examples/langchain-python-simple/main.py | 2 +- examples/modelfile-mario/Modelfile | 2 +- examples/modelfile-mario/readme.md | 6 +- .../README.md | 6 +- .../main.py | 2 +- .../predefinedschema.py | 2 +- .../randomaddresses.py | 2 +- examples/python-json-datagenerator/readme.md | 4 +- examples/python-simplechat/client.py | 2 +- examples/python-simplechat/readme.md | 4 +- examples/typescript-simplechat/client.ts | 2 +- macapp/src/app.tsx | 2 +- 29 files changed, 102 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index 65d93db5..d422b8cb 100644 --- a/README.md +++ b/README.md @@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla ## Quickstart -To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1): +To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2): ``` -ollama run llama3.1 +ollama run llama3.2 ``` ## Model library @@ -49,6 +49,8 @@ Here are some example models that can be downloaded: | Model | Parameters | Size | Download | | ------------------ | ---------- | ----- | ------------------------------ | +| Llama 3.2 | 3B | 2.0GB | `ollama run llama3.2` | +| Llama 3.2 | 1B | 1.3GB | `ollama run llama3.1:1b` | | Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` | | Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` | | Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` | @@ -99,16 +101,16 @@ See the [guide](docs/import.md) on importing models for more information. ### Customize a prompt -Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model: +Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model: ``` -ollama pull llama3.1 +ollama pull llama3.2 ``` Create a `Modelfile`: ``` -FROM llama3.1 +FROM llama3.2 # set the temperature to 1 [higher is more creative, lower is more coherent] PARAMETER temperature 1 @@ -143,7 +145,7 @@ ollama create mymodel -f ./Modelfile ### Pull a model ``` -ollama pull llama3.1 +ollama pull llama3.2 ``` > This command can also be used to update a local model. Only the diff will be pulled. @@ -151,13 +153,13 @@ ollama pull llama3.1 ### Remove a model ``` -ollama rm llama3.1 +ollama rm llama3.2 ``` ### Copy a model ``` -ollama cp llama3.1 my-model +ollama cp llama3.2 my-model ``` ### Multiline input @@ -181,14 +183,14 @@ The image features a yellow smiley face, which is likely the central focus of th ### Pass the prompt as an argument ``` -$ ollama run llama3.1 "Summarize this file: $(cat README.md)" +$ ollama run llama3.2 "Summarize this file: $(cat README.md)" Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. ``` ### Show model information ``` -ollama show llama3.1 +ollama show llama3.2 ``` ### List models on your computer @@ -206,7 +208,7 @@ ollama ps ### Stop a model which is currently running ``` -ollama stop llama3.1 +ollama stop llama3.2 ``` ### Start Ollama @@ -228,7 +230,7 @@ Next, start the server: Finally, in a separate shell, run a model: ``` -./ollama run llama3.1 +./ollama run llama3.2 ``` ## REST API @@ -239,7 +241,7 @@ Ollama has a REST API for running and managing models. ``` curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt":"Why is the sky blue?" }' ``` @@ -248,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{ ``` curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "user", "content": "why is the sky blue?" } ] diff --git a/app/ollama.iss b/app/ollama.iss index 63b5bdb0..4038815a 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -142,7 +142,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi ;FinishedHeadingLabel=Run your first model -;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.1 +;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n ollama run llama3.2 ;ClickFinish=%n [Registry] diff --git a/app/ollama_welcome.ps1 b/app/ollama_welcome.ps1 index 46777a3a..e9695748 100644 --- a/app/ollama_welcome.ps1 +++ b/app/ollama_welcome.ps1 @@ -4,5 +4,5 @@ write-host "Welcome to Ollama!" write-host "" write-host "Run your first model:" write-host "" -write-host "`tollama run llama3.1" +write-host "`tollama run llama3.2" write-host "" \ No newline at end of file diff --git a/docs/api.md b/docs/api.md index 95e79e00..fe2eb82c 100644 --- a/docs/api.md +++ b/docs/api.md @@ -69,7 +69,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt": "Why is the sky blue?" }' ``` @@ -80,7 +80,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T08:52:19.385406455-07:00", "response": "The", "done": false @@ -102,7 +102,7 @@ To calculate how fast the response is generated in tokens per second (token/s), ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T19:22:45.499127Z", "response": "", "done": true, @@ -124,7 +124,7 @@ A response can be received in one reply when streaming is off. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt": "Why is the sky blue?", "stream": false }' @@ -136,7 +136,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -194,7 +194,7 @@ curl http://localhost:11434/api/generate -d '{ ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt": "What color is the sky at different times of the day? Respond using JSON", "format": "json", "stream": false @@ -205,7 +205,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-11-09T21:07:55.186497Z", "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n", "done": true, @@ -327,7 +327,7 @@ If you want to set custom options for the model at runtime rather than in the Mo ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt": "Why is the sky blue?", "stream": false, "options": { @@ -368,7 +368,7 @@ curl http://localhost:11434/api/generate -d '{ ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T19:22:45.499127Z", "response": "The sky is blue because it is the color of the sky.", "done": true, @@ -390,7 +390,7 @@ If an empty prompt is provided, the model will be loaded into memory. ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1" + "model": "llama3.2" }' ``` @@ -400,7 +400,7 @@ A single JSON object is returned: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-12-18T19:52:07.071755Z", "response": "", "done": true @@ -415,7 +415,7 @@ If an empty prompt is provided and the `keep_alive` parameter is set to `0`, a m ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "keep_alive": 0 }' ``` @@ -426,7 +426,7 @@ A single JSON object is returned: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2024-09-12T03:54:03.516566Z", "response": "", "done": true, @@ -472,7 +472,7 @@ Send a chat message with a streaming response. ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "user", @@ -488,7 +488,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -503,7 +503,7 @@ Final response: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 4883583458, @@ -521,7 +521,7 @@ Final response: ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "user", @@ -536,7 +536,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -560,7 +560,7 @@ Send a chat message with a conversation history. You can use this same approach ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "user", @@ -584,7 +584,7 @@ A stream of JSON objects is returned: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T08:52:19.385406455-07:00", "message": { "role": "assistant", @@ -598,7 +598,7 @@ Final response: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-08-04T19:22:45.499127Z", "done": true, "total_duration": 8113331500, @@ -656,7 +656,7 @@ curl http://localhost:11434/api/chat -d '{ ```shell curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "user", @@ -674,7 +674,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2023-12-12T14:13:43.416799Z", "message": { "role": "assistant", @@ -696,7 +696,7 @@ curl http://localhost:11434/api/chat -d '{ ``` curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "user", @@ -735,7 +735,7 @@ curl http://localhost:11434/api/chat -d '{ ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at": "2024-07-22T20:33:28.123648Z", "message": { "role": "assistant", @@ -771,7 +771,7 @@ If the messages array is empty, the model will be loaded into memory. ``` curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [] }' ``` @@ -779,7 +779,7 @@ curl http://localhost:11434/api/chat -d '{ ##### Response ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at":"2024-09-12T21:17:29.110811Z", "message": { "role": "assistant", @@ -798,7 +798,7 @@ If the messages array is empty and the `keep_alive` parameter is set to `0`, a m ``` curl http://localhost:11434/api/chat -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [], "keep_alive": 0 }' @@ -810,7 +810,7 @@ A single JSON object is returned: ```json { - "model": "llama3.1", + "model": "llama3.2", "created_at":"2024-09-12T21:33:17.547535Z", "message": { "role": "assistant", @@ -989,7 +989,7 @@ Show information about a model including details, modelfile, template, parameter ```shell curl http://localhost:11434/api/show -d '{ - "name": "llama3.1" + "name": "llama3.2" }' ``` @@ -1050,7 +1050,7 @@ Copy a model. Creates a model with another name from an existing model. ```shell curl http://localhost:11434/api/copy -d '{ - "source": "llama3.1", + "source": "llama3.2", "destination": "llama3-backup" }' ``` @@ -1105,7 +1105,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where ```shell curl http://localhost:11434/api/pull -d '{ - "name": "llama3.1" + "name": "llama3.2" }' ``` diff --git a/docs/docker.md b/docs/docker.md index 314666b2..9c758c38 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -63,7 +63,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114 Now you can run a model: ``` -docker exec -it ollama ollama run llama3.1 +docker exec -it ollama ollama run llama3.2 ``` ### Try different models diff --git a/docs/faq.md b/docs/faq.md index b2b1ca30..0dbbb3ff 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter: ```shell curl http://localhost:11434/api/generate -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt": "Why is the sky blue?", "options": { "num_ctx": 4096 @@ -232,7 +232,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}' To preload a model using the CLI, use the command: ```shell -ollama run llama3.1 "" +ollama run llama3.2 "" ``` ## How do I keep a model loaded in memory or make it unload immediately? @@ -240,7 +240,7 @@ ollama run llama3.1 "" By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you're making numerous requests to the LLM. If you want to immediately unload a model from memory, use the `ollama stop` command: ```shell -ollama stop llama3.1 +ollama stop llama3.2 ``` If you're using the API, use the `keep_alive` parameter with the `/api/generate` and `/api/chat` endpoints to set the amount of time that a model stays in memory. The `keep_alive` parameter can be set to: @@ -251,12 +251,12 @@ If you're using the API, use the `keep_alive` parameter with the `/api/generate` For example, to preload a model and leave it in memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": -1}' +curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": -1}' ``` To unload the model and free up memory use: ```shell -curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": 0}' +curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": 0}' ``` Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to the section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable. diff --git a/docs/modelfile.md b/docs/modelfile.md index a33f180b..aa2849e7 100644 --- a/docs/modelfile.md +++ b/docs/modelfile.md @@ -50,7 +50,7 @@ INSTRUCTION arguments An example of a `Modelfile` creating a mario blueprint: ```modelfile -FROM llama3.1 +FROM llama3.2 # sets the temperature to 1 [higher is more creative, lower is more coherent] PARAMETER temperature 1 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token @@ -72,10 +72,10 @@ More examples are available in the [examples directory](../examples). To view the Modelfile of a given model, use the `ollama show --modelfile` command. ```bash - > ollama show --modelfile llama3.1 + > ollama show --modelfile llama3.2 # Modelfile generated by "ollama show" # To build a new Modelfile based on this one, replace the FROM line with: - # FROM llama3.1:latest + # FROM llama3.2:latest FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29 TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|> @@ -103,7 +103,7 @@ FROM : #### Build from existing model ```modelfile -FROM llama3.1 +FROM llama3.2 ``` A list of available base models: diff --git a/docs/openai.md b/docs/openai.md index c6df0fec..e13842c0 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create( 'content': 'Say this is a test', } ], - model='llama3.1', + model='llama3.2', ) response = client.chat.completions.create( @@ -46,13 +46,13 @@ response = client.chat.completions.create( ) completion = client.completions.create( - model="llama3.1", + model="llama3.2", prompt="Say this is a test", ) list_completion = client.models.list() -model = client.models.retrieve("llama3.1") +model = client.models.retrieve("llama3.2") embeddings = client.embeddings.create( model="all-minilm", @@ -74,7 +74,7 @@ const openai = new OpenAI({ const chatCompletion = await openai.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'llama3.1', + model: 'llama3.2', }) const response = await openai.chat.completions.create({ @@ -94,13 +94,13 @@ const response = await openai.chat.completions.create({ }) const completion = await openai.completions.create({ - model: "llama3.1", + model: "llama3.2", prompt: "Say this is a test.", }) const listCompletion = await openai.models.list() -const model = await openai.models.retrieve("llama3.1") +const model = await openai.models.retrieve("llama3.2") const embedding = await openai.embeddings.create({ model: "all-minilm", @@ -114,7 +114,7 @@ const embedding = await openai.embeddings.create({ curl http://localhost:11434/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3.1", + "model": "llama3.2", "messages": [ { "role": "system", @@ -154,13 +154,13 @@ curl http://localhost:11434/v1/chat/completions \ curl http://localhost:11434/v1/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "llama3.1", + "model": "llama3.2", "prompt": "Say this is a test" }' curl http://localhost:11434/v1/models -curl http://localhost:11434/v1/models/llama3.1 +curl http://localhost:11434/v1/models/llama3.2 curl http://localhost:11434/v1/embeddings \ -H "Content-Type: application/json" \ @@ -274,7 +274,7 @@ curl http://localhost:11434/v1/embeddings \ Before using a model, pull it locally `ollama pull`: ```shell -ollama pull llama3.1 +ollama pull llama3.2 ``` ### Default model names @@ -282,7 +282,7 @@ ollama pull llama3.1 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name: ``` -ollama cp llama3.1 gpt-3.5-turbo +ollama cp llama3.2 gpt-3.5-turbo ``` Afterwards, this new model name can be specified the `model` field: diff --git a/docs/template.md b/docs/template.md index 192d878d..bd367e91 100644 --- a/docs/template.md +++ b/docs/template.md @@ -33,7 +33,7 @@ Omitting a template in these models puts the responsibility of correctly templat To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3. ```dockerfile -FROM llama3.1 +FROM llama3.2 TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|> diff --git a/docs/tutorials/langchainjs.md b/docs/tutorials/langchainjs.md index f925869b..86f895ae 100644 --- a/docs/tutorials/langchainjs.md +++ b/docs/tutorials/langchainjs.md @@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama"; const ollama = new Ollama({ baseUrl: "http://localhost:11434", - model: "llama3.1", + model: "llama3.2", }); const answer = await ollama.invoke(`why is the sky blue?`); @@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`); console.log(answer); ``` -That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app. +That will get us the same thing as if we ran `ollama run llama3.2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app. ```bash npm install cheerio diff --git a/docs/windows.md b/docs/windows.md index 372a35aa..5f196756 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -29,7 +29,7 @@ Ollama uses unicode characters for progress indication, which may render as unkn Here's a quick example showing API access from `powershell` ```powershell -(Invoke-WebRequest -method POST -Body '{"model":"llama3.1", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json +(Invoke-WebRequest -method POST -Body '{"model":"llama3.2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json ``` ## Troubleshooting diff --git a/examples/go-chat/main.go b/examples/go-chat/main.go index 7663fb8f..07430305 100644 --- a/examples/go-chat/main.go +++ b/examples/go-chat/main.go @@ -35,7 +35,7 @@ func main() { ctx := context.Background() req := &api.ChatRequest{ - Model: "llama3.1", + Model: "llama3.2", Messages: messages, } diff --git a/examples/langchain-python-rag-document/README.md b/examples/langchain-python-rag-document/README.md index e2f3bc02..d37afc9d 100644 --- a/examples/langchain-python-rag-document/README.md +++ b/examples/langchain-python-rag-document/README.md @@ -4,10 +4,10 @@ This example provides an interface for asking questions to a PDF document. ## Setup -1. Ensure you have the `llama3.1` model installed: +1. Ensure you have the `llama3.2` model installed: ``` -ollama pull llama3.1 +ollama pull llama3.2 ``` 2. Install the Python Requirements. diff --git a/examples/langchain-python-rag-document/main.py b/examples/langchain-python-rag-document/main.py index 6f7cec9b..4871a042 100644 --- a/examples/langchain-python-rag-document/main.py +++ b/examples/langchain-python-rag-document/main.py @@ -51,7 +51,7 @@ while True: template=template, ) - llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) + llm = Ollama(model="llama3.2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) qa_chain = RetrievalQA.from_chain_type( llm, retriever=vectorstore.as_retriever(), diff --git a/examples/langchain-python-rag-websummary/README.md b/examples/langchain-python-rag-websummary/README.md index 29c706a3..746c47ab 100644 --- a/examples/langchain-python-rag-websummary/README.md +++ b/examples/langchain-python-rag-websummary/README.md @@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso ## Running the Example -1. Ensure you have the `llama3.1` model installed: +1. Ensure you have the `llama3.2` model installed: ```bash - ollama pull llama3.1 + ollama pull llama3.2 ``` 2. Install the Python Requirements. diff --git a/examples/langchain-python-rag-websummary/main.py b/examples/langchain-python-rag-websummary/main.py index 77b09fbb..56f8bd24 100644 --- a/examples/langchain-python-rag-websummary/main.py +++ b/examples/langchain-python-rag-websummary/main.py @@ -5,7 +5,7 @@ from langchain.chains.summarize import load_summarize_chain loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally") docs = loader.load() -llm = Ollama(model="llama3.1") +llm = Ollama(model="llama3.2") chain = load_summarize_chain(llm, chain_type="stuff") result = chain.invoke(docs) diff --git a/examples/langchain-python-simple/README.md b/examples/langchain-python-simple/README.md index 60db2c8c..680ab560 100644 --- a/examples/langchain-python-simple/README.md +++ b/examples/langchain-python-simple/README.md @@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama. ## Running the Example -1. Ensure you have the `llama3.1` model installed: +1. Ensure you have the `llama3.2` model installed: ```bash - ollama pull llama3.1 + ollama pull llama3.2 ``` 2. Install the Python Requirements. diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py index a7ed81d6..8d6989c8 100644 --- a/examples/langchain-python-simple/main.py +++ b/examples/langchain-python-simple/main.py @@ -1,6 +1,6 @@ from langchain.llms import Ollama input = input("What is your question?") -llm = Ollama(model="llama3.1") +llm = Ollama(model="llama3.2") res = llm.predict(input) print (res) diff --git a/examples/modelfile-mario/Modelfile b/examples/modelfile-mario/Modelfile index a3747086..b8e49667 100644 --- a/examples/modelfile-mario/Modelfile +++ b/examples/modelfile-mario/Modelfile @@ -1,4 +1,4 @@ -FROM llama3.1 +FROM llama3.2 PARAMETER temperature 1 SYSTEM """ You are Mario from super mario bros, acting as an assistant. diff --git a/examples/modelfile-mario/readme.md b/examples/modelfile-mario/readme.md index c3f34197..882023ad 100644 --- a/examples/modelfile-mario/readme.md +++ b/examples/modelfile-mario/readme.md @@ -2,12 +2,12 @@ # Example character: Mario -This example shows how to create a basic character using Llama3.1 as the base model. +This example shows how to create a basic character using Llama 3.2 as the base model. To run this example: 1. Download the Modelfile -2. `ollama pull llama3.1` to get the base model used in the model file. +2. `ollama pull llama3.2` to get the base model used in the model file. 3. `ollama create NAME -f ./Modelfile` 4. `ollama run NAME` @@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?" What the model file looks like: ``` -FROM llama3.1 +FROM llama3.2 PARAMETER temperature 1 SYSTEM """ You are Mario from Super Mario Bros, acting as an assistant. diff --git a/examples/python-grounded-factuality-rag-check/README.md b/examples/python-grounded-factuality-rag-check/README.md index 5c981752..cd72071c 100644 --- a/examples/python-grounded-factuality-rag-check/README.md +++ b/examples/python-grounded-factuality-rag-check/README.md @@ -1,14 +1,14 @@ # RAG Hallucination Checker using Bespoke-Minicheck -This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.1` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. +This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. ## Running the Example -1. Ensure `all-minilm` (embedding) `llama3.1` (chat) and `bespoke-minicheck` (check) models installed: +1. Ensure `all-minilm` (embedding) `llama3.2` (chat) and `bespoke-minicheck` (check) models installed: ```bash ollama pull all-minilm - ollama pull llama3.1 + ollama pull llama3.2 ollama pull bespoke-minicheck ``` diff --git a/examples/python-grounded-factuality-rag-check/main.py b/examples/python-grounded-factuality-rag-check/main.py index f4d562d5..eab0b670 100644 --- a/examples/python-grounded-factuality-rag-check/main.py +++ b/examples/python-grounded-factuality-rag-check/main.py @@ -119,7 +119,7 @@ if __name__ == "__main__": system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}" ollama_response = ollama.generate( - model="llama3.1", + model="llama3.2", prompt=question, system=system_prompt, options={"stream": False}, diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py index 68090ad7..91463760 100644 --- a/examples/python-json-datagenerator/predefinedschema.py +++ b/examples/python-json-datagenerator/predefinedschema.py @@ -2,7 +2,7 @@ import requests import json import random -model = "llama3.1" +model = "llama3.2" template = { "firstName": "", "lastName": "", diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py index 878c9803..3df59d32 100644 --- a/examples/python-json-datagenerator/randomaddresses.py +++ b/examples/python-json-datagenerator/randomaddresses.py @@ -12,7 +12,7 @@ countries = [ "France", ] country = random.choice(countries) -model = "llama3.1" +model = "llama3.2" prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md index 5b444dff..a551e1dd 100644 --- a/examples/python-json-datagenerator/readme.md +++ b/examples/python-json-datagenerator/readme.md @@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran ## Running the Example -1. Ensure you have the `llama3.1` model installed: +1. Ensure you have the `llama3.2` model installed: ```bash - ollama pull llama3.1 + ollama pull llama3.2 ``` 2. Install the Python Requirements. diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py index 85043d5f..6ef14ffc 100644 --- a/examples/python-simplechat/client.py +++ b/examples/python-simplechat/client.py @@ -2,7 +2,7 @@ import json import requests # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` -model = "llama3.1" # TODO: update this for whatever model you wish to use +model = "llama3.2" # TODO: update this for whatever model you wish to use def chat(messages): diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md index 4c2ded4d..a4a2dfc1 100644 --- a/examples/python-simplechat/readme.md +++ b/examples/python-simplechat/readme.md @@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam ## Running the Example -1. Ensure you have the `llama3.1` model installed: +1. Ensure you have the `llama3.2` model installed: ```bash - ollama pull llama3.1 + ollama pull llama3.2 ``` 2. Install the Python Requirements. diff --git a/examples/typescript-simplechat/client.ts b/examples/typescript-simplechat/client.ts index 8ad113b1..d8faaa1b 100644 --- a/examples/typescript-simplechat/client.ts +++ b/examples/typescript-simplechat/client.ts @@ -1,6 +1,6 @@ import * as readline from "readline"; -const model = "llama3.1"; +const model = "llama3.2"; type Message = { role: "assistant" | "user" | "system"; content: string; diff --git a/macapp/src/app.tsx b/macapp/src/app.tsx index a627e63d..449fc851 100644 --- a/macapp/src/app.tsx +++ b/macapp/src/app.tsx @@ -19,7 +19,7 @@ export default function () { const [step, setStep] = useState(Step.WELCOME) const [commandCopied, setCommandCopied] = useState(false) - const command = 'ollama run llama3.1' + const command = 'ollama run llama3.2' return (