From 55ea963c9e9033d01c7c20a54c5ede5babb6878e Mon Sep 17 00:00:00 2001
From: Jeffrey Morgan <jmorganca@gmail.com>
Date: Wed, 25 Sep 2024 11:11:22 -0700
Subject: [PATCH] update default model to llama3.2 (#6959)

---
 README.md                                     | 30 +++++----
 app/ollama.iss                                |  2 +-
 app/ollama_welcome.ps1                        |  2 +-
 docs/api.md                                   | 64 +++++++++----------
 docs/docker.md                                |  2 +-
 docs/faq.md                                   | 10 +--
 docs/modelfile.md                             |  8 +--
 docs/openai.md                                | 22 +++----
 docs/template.md                              |  2 +-
 docs/tutorials/langchainjs.md                 |  4 +-
 docs/windows.md                               |  2 +-
 examples/go-chat/main.go                      |  2 +-
 .../langchain-python-rag-document/README.md   |  4 +-
 .../langchain-python-rag-document/main.py     |  2 +-
 .../langchain-python-rag-websummary/README.md |  4 +-
 .../langchain-python-rag-websummary/main.py   |  2 +-
 examples/langchain-python-simple/README.md    |  4 +-
 examples/langchain-python-simple/main.py      |  2 +-
 examples/modelfile-mario/Modelfile            |  2 +-
 examples/modelfile-mario/readme.md            |  6 +-
 .../README.md                                 |  6 +-
 .../main.py                                   |  2 +-
 .../predefinedschema.py                       |  2 +-
 .../randomaddresses.py                        |  2 +-
 examples/python-json-datagenerator/readme.md  |  4 +-
 examples/python-simplechat/client.py          |  2 +-
 examples/python-simplechat/readme.md          |  4 +-
 examples/typescript-simplechat/client.ts      |  2 +-
 macapp/src/app.tsx                            |  2 +-
 29 files changed, 102 insertions(+), 100 deletions(-)

diff --git a/README.md b/README.md
index 65d93db5..d422b8cb 100644
--- a/README.md
+++ b/README.md
@@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla
 
 ## Quickstart
 
-To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
+To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2):
 
 ```
-ollama run llama3.1
+ollama run llama3.2
 ```
 
 ## Model library
@@ -49,6 +49,8 @@ Here are some example models that can be downloaded:
 
 | Model              | Parameters | Size  | Download                       |
 | ------------------ | ---------- | ----- | ------------------------------ |
+| Llama 3.2          | 3B         | 2.0GB | `ollama run llama3.2`          |
+| Llama 3.2          | 1B         | 1.3GB | `ollama run llama3.1:1b`       |
 | Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`          |
 | Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`      |
 | Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
@@ -99,16 +101,16 @@ See the [guide](docs/import.md) on importing models for more information.
 
 ### Customize a prompt
 
-Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model:
 
 ```
-ollama pull llama3.1
+ollama pull llama3.2
 ```
 
 Create a `Modelfile`:
 
 ```
-FROM llama3.1
+FROM llama3.2
 
 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@@ -143,7 +145,7 @@ ollama create mymodel -f ./Modelfile
 ### Pull a model
 
 ```
-ollama pull llama3.1
+ollama pull llama3.2
 ```
 
 > This command can also be used to update a local model. Only the diff will be pulled.
@@ -151,13 +153,13 @@ ollama pull llama3.1
 ### Remove a model
 
 ```
-ollama rm llama3.1
+ollama rm llama3.2
 ```
 
 ### Copy a model
 
 ```
-ollama cp llama3.1 my-model
+ollama cp llama3.2 my-model
 ```
 
 ### Multiline input
@@ -181,14 +183,14 @@ The image features a yellow smiley face, which is likely the central focus of th
 ### Pass the prompt as an argument
 
 ```
-$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
+$ ollama run llama3.2 "Summarize this file: $(cat README.md)"
  Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```
 
 ### Show model information
 
 ```
-ollama show llama3.1
+ollama show llama3.2
 ```
 
 ### List models on your computer
@@ -206,7 +208,7 @@ ollama ps
 ### Stop a model which is currently running
 
 ```
-ollama stop llama3.1
+ollama stop llama3.2
 ```
 
 ### Start Ollama
@@ -228,7 +230,7 @@ Next, start the server:
 Finally, in a separate shell, run a model:
 
 ```
-./ollama run llama3.1
+./ollama run llama3.2
 ```
 
 ## REST API
@@ -239,7 +241,7 @@ Ollama has a REST API for running and managing models.
 
 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "prompt":"Why is the sky blue?"
 }'
 ```
@@ -248,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
 
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [
     { "role": "user", "content": "why is the sky blue?" }
   ]
diff --git a/app/ollama.iss b/app/ollama.iss
index 63b5bdb0..4038815a 100644
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -142,7 +142,7 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
 
 
 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.1
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.2
 ;ClickFinish=%n
 
 [Registry]
diff --git a/app/ollama_welcome.ps1 b/app/ollama_welcome.ps1
index 46777a3a..e9695748 100644
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
 write-host ""
 write-host "Run your first model:"
 write-host ""
-write-host "`tollama run llama3.1"
+write-host "`tollama run llama3.2"
 write-host ""
\ No newline at end of file
diff --git a/docs/api.md b/docs/api.md
index 95e79e00..fe2eb82c 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -69,7 +69,7 @@ Enable JSON mode by setting the `format` parameter to `json`. This will structur
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "prompt": "Why is the sky blue?"
 }'
 ```
@@ -80,7 +80,7 @@ A stream of JSON objects is returned:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T08:52:19.385406455-07:00",
   "response": "The",
   "done": false
@@ -102,7 +102,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T19:22:45.499127Z",
   "response": "",
   "done": true,
@@ -124,7 +124,7 @@ A response can be received in one reply when streaming is off.
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "prompt": "Why is the sky blue?",
   "stream": false
 }'
@@ -136,7 +136,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T19:22:45.499127Z",
   "response": "The sky is blue because it is the color of the sky.",
   "done": true,
@@ -194,7 +194,7 @@ curl http://localhost:11434/api/generate -d '{
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "prompt": "What color is the sky at different times of the day? Respond using JSON",
   "format": "json",
   "stream": false
@@ -205,7 +205,7 @@ curl http://localhost:11434/api/generate -d '{
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-11-09T21:07:55.186497Z",
   "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
   "done": true,
@@ -327,7 +327,7 @@ If you want to set custom options for the model at runtime rather than in the Mo
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "prompt": "Why is the sky blue?",
   "stream": false,
   "options": {
@@ -368,7 +368,7 @@ curl http://localhost:11434/api/generate -d '{
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T19:22:45.499127Z",
   "response": "The sky is blue because it is the color of the sky.",
   "done": true,
@@ -390,7 +390,7 @@ If an empty prompt is provided, the model will be loaded into memory.
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1"
+  "model": "llama3.2"
 }'
 ```
 
@@ -400,7 +400,7 @@ A single JSON object is returned:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-12-18T19:52:07.071755Z",
   "response": "",
   "done": true
@@ -415,7 +415,7 @@ If an empty prompt is provided and the `keep_alive` parameter is set to `0`, a m
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "keep_alive": 0
 }'
 ```
@@ -426,7 +426,7 @@ A single JSON object is returned:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2024-09-12T03:54:03.516566Z",
   "response": "",
   "done": true,
@@ -472,7 +472,7 @@ Send a chat message with a streaming response.
 
 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [
     {
       "role": "user",
@@ -488,7 +488,7 @@ A stream of JSON objects is returned:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T08:52:19.385406455-07:00",
   "message": {
     "role": "assistant",
@@ -503,7 +503,7 @@ Final response:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T19:22:45.499127Z",
   "done": true,
   "total_duration": 4883583458,
@@ -521,7 +521,7 @@ Final response:
 
 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [
     {
       "role": "user",
@@ -536,7 +536,7 @@ curl http://localhost:11434/api/chat -d '{
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-12-12T14:13:43.416799Z",
   "message": {
     "role": "assistant",
@@ -560,7 +560,7 @@ Send a chat message with a conversation history. You can use this same approach
 
 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [
     {
       "role": "user",
@@ -584,7 +584,7 @@ A stream of JSON objects is returned:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T08:52:19.385406455-07:00",
   "message": {
     "role": "assistant",
@@ -598,7 +598,7 @@ Final response:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-08-04T19:22:45.499127Z",
   "done": true,
   "total_duration": 8113331500,
@@ -656,7 +656,7 @@ curl http://localhost:11434/api/chat -d '{
 
 ```shell
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [
     {
       "role": "user",
@@ -674,7 +674,7 @@ curl http://localhost:11434/api/chat -d '{
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2023-12-12T14:13:43.416799Z",
   "message": {
     "role": "assistant",
@@ -696,7 +696,7 @@ curl http://localhost:11434/api/chat -d '{
 
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [
     {
       "role": "user",
@@ -735,7 +735,7 @@ curl http://localhost:11434/api/chat -d '{
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at": "2024-07-22T20:33:28.123648Z",
   "message": {
     "role": "assistant",
@@ -771,7 +771,7 @@ If the messages array is empty, the model will be loaded into memory.
 
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": []
 }'
 ```
@@ -779,7 +779,7 @@ curl http://localhost:11434/api/chat -d '{
 ##### Response
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at":"2024-09-12T21:17:29.110811Z",
   "message": {
     "role": "assistant",
@@ -798,7 +798,7 @@ If the messages array is empty and the `keep_alive` parameter is set to `0`, a m
 
 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "messages": [],
   "keep_alive": 0
 }'
@@ -810,7 +810,7 @@ A single JSON object is returned:
 
 ```json
 {
-  "model": "llama3.1",
+  "model": "llama3.2",
   "created_at":"2024-09-12T21:33:17.547535Z",
   "message": {
     "role": "assistant",
@@ -989,7 +989,7 @@ Show information about a model including details, modelfile, template, parameter
 
 ```shell
 curl http://localhost:11434/api/show -d '{
-  "name": "llama3.1"
+  "name": "llama3.2"
 }'
 ```
 
@@ -1050,7 +1050,7 @@ Copy a model. Creates a model with another name from an existing model.
 
 ```shell
 curl http://localhost:11434/api/copy -d '{
-  "source": "llama3.1",
+  "source": "llama3.2",
   "destination": "llama3-backup"
 }'
 ```
@@ -1105,7 +1105,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
 
 ```shell
 curl http://localhost:11434/api/pull -d '{
-  "name": "llama3.1"
+  "name": "llama3.2"
 }'
 ```
 
diff --git a/docs/docker.md b/docs/docker.md
index 314666b2..9c758c38 100644
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -63,7 +63,7 @@ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 114
 Now you can run a model:
 
 ```
-docker exec -it ollama ollama run llama3.1
+docker exec -it ollama ollama run llama3.2
 ```
 
 ### Try different models
diff --git a/docs/faq.md b/docs/faq.md
index b2b1ca30..0dbbb3ff 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -32,7 +32,7 @@ When using the API, specify the `num_ctx` parameter:
 
 ```shell
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.1",
+  "model": "llama3.2",
   "prompt": "Why is the sky blue?",
   "options": {
     "num_ctx": 4096
@@ -232,7 +232,7 @@ curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
 
 To preload a model using the CLI, use the command:
 ```shell
-ollama run llama3.1 ""
+ollama run llama3.2 ""
 ```
 
 ## How do I keep a model loaded in memory or make it unload immediately?
@@ -240,7 +240,7 @@ ollama run llama3.1 ""
 By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you're making numerous requests to the LLM. If you want to immediately unload a model from memory, use the `ollama stop` command:
 
 ```shell
-ollama stop llama3.1
+ollama stop llama3.2
 ```
 
 If you're using the API, use the `keep_alive` parameter with the `/api/generate` and `/api/chat` endpoints to set the amount of time that a model stays in memory. The `keep_alive` parameter can be set to:
@@ -251,12 +251,12 @@ If you're using the API, use the `keep_alive` parameter with the `/api/generate`
 
 For example, to preload a model and leave it in memory use:
 ```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": -1}'
+curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": -1}'
 ```
 
 To unload the model and free up memory use:
 ```shell
-curl http://localhost:11434/api/generate -d '{"model": "llama3.1", "keep_alive": 0}'
+curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "keep_alive": 0}'
 ```
 
 Alternatively, you can change the amount of time all models are loaded into memory by setting the `OLLAMA_KEEP_ALIVE` environment variable when starting the Ollama server. The `OLLAMA_KEEP_ALIVE` variable uses the same parameter types as the `keep_alive` parameter types mentioned above. Refer to the section explaining [how to configure the Ollama server](#how-do-i-configure-ollama-server) to correctly set the environment variable.
diff --git a/docs/modelfile.md b/docs/modelfile.md
index a33f180b..aa2849e7 100644
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -50,7 +50,7 @@ INSTRUCTION arguments
 An example of a `Modelfile` creating a mario blueprint:
 
 ```modelfile
-FROM llama3.1
+FROM llama3.2
 # sets the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
 # sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
@@ -72,10 +72,10 @@ More examples are available in the [examples directory](../examples).
 To view the Modelfile of a given model, use the `ollama show --modelfile` command.
 
   ```bash
-  > ollama show --modelfile llama3.1
+  > ollama show --modelfile llama3.2
   # Modelfile generated by "ollama show"
   # To build a new Modelfile based on this one, replace the FROM line with:
-  # FROM llama3.1:latest
+  # FROM llama3.2:latest
   FROM /Users/pdevine/.ollama/models/blobs/sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
   TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
 
@@ -103,7 +103,7 @@ FROM <model name>:<tag>
 #### Build from existing model
 
 ```modelfile
-FROM llama3.1
+FROM llama3.2
 ```
 
 A list of available base models:
diff --git a/docs/openai.md b/docs/openai.md
index c6df0fec..e13842c0 100644
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -25,7 +25,7 @@ chat_completion = client.chat.completions.create(
             'content': 'Say this is a test',
         }
     ],
-    model='llama3.1',
+    model='llama3.2',
 )
 
 response = client.chat.completions.create(
@@ -46,13 +46,13 @@ response = client.chat.completions.create(
 )
 
 completion = client.completions.create(
-    model="llama3.1",
+    model="llama3.2",
     prompt="Say this is a test",
 )
 
 list_completion = client.models.list()
 
-model = client.models.retrieve("llama3.1")
+model = client.models.retrieve("llama3.2")
 
 embeddings = client.embeddings.create(
     model="all-minilm",
@@ -74,7 +74,7 @@ const openai = new OpenAI({
 
 const chatCompletion = await openai.chat.completions.create({
     messages: [{ role: 'user', content: 'Say this is a test' }],
-    model: 'llama3.1',
+    model: 'llama3.2',
 })
 
 const response = await openai.chat.completions.create({
@@ -94,13 +94,13 @@ const response = await openai.chat.completions.create({
 })
 
 const completion = await openai.completions.create({
-    model: "llama3.1",
+    model: "llama3.2",
     prompt: "Say this is a test.",
 })
 
 const listCompletion = await openai.models.list()
 
-const model = await openai.models.retrieve("llama3.1")
+const model = await openai.models.retrieve("llama3.2")
 
 const embedding = await openai.embeddings.create({
   model: "all-minilm",
@@ -114,7 +114,7 @@ const embedding = await openai.embeddings.create({
 curl http://localhost:11434/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
-        "model": "llama3.1",
+        "model": "llama3.2",
         "messages": [
             {
                 "role": "system",
@@ -154,13 +154,13 @@ curl http://localhost:11434/v1/chat/completions \
 curl http://localhost:11434/v1/completions \
     -H "Content-Type: application/json" \
     -d '{
-        "model": "llama3.1",
+        "model": "llama3.2",
         "prompt": "Say this is a test"
     }'
 
 curl http://localhost:11434/v1/models
 
-curl http://localhost:11434/v1/models/llama3.1
+curl http://localhost:11434/v1/models/llama3.2
 
 curl http://localhost:11434/v1/embeddings \
     -H "Content-Type: application/json" \
@@ -274,7 +274,7 @@ curl http://localhost:11434/v1/embeddings \
 Before using a model, pull it locally `ollama pull`:
 
 ```shell
-ollama pull llama3.1
+ollama pull llama3.2
 ```
 
 ### Default model names
@@ -282,7 +282,7 @@ ollama pull llama3.1
 For tooling that relies on default OpenAI model names such as `gpt-3.5-turbo`, use `ollama cp` to copy an existing model name to a temporary name:
 
 ```
-ollama cp llama3.1 gpt-3.5-turbo
+ollama cp llama3.2 gpt-3.5-turbo
 ```
 
 Afterwards, this new model name can be specified the `model` field:
diff --git a/docs/template.md b/docs/template.md
index 192d878d..bd367e91 100644
--- a/docs/template.md
+++ b/docs/template.md
@@ -33,7 +33,7 @@ Omitting a template in these models puts the responsibility of correctly templat
 To add templates in your model, you'll need to add a `TEMPLATE` command to the Modelfile. Here's an example using Meta's Llama 3.
 
 ```dockerfile
-FROM llama3.1
+FROM llama3.2
 
 TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>
 
diff --git a/docs/tutorials/langchainjs.md b/docs/tutorials/langchainjs.md
index f925869b..86f895ae 100644
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
 
 const ollama = new Ollama({
   baseUrl: "http://localhost:11434",
-  model: "llama3.1",
+  model: "llama3.2",
 });
 
 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```
 
-That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3.2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
 
 ```bash
 npm install cheerio
diff --git a/docs/windows.md b/docs/windows.md
index 372a35aa..5f196756 100644
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -29,7 +29,7 @@ Ollama uses unicode characters for progress indication, which may render as unkn
 
 Here's a quick example showing API access from `powershell`
 ```powershell
-(Invoke-WebRequest -method POST -Body '{"model":"llama3.1", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
+(Invoke-WebRequest -method POST -Body '{"model":"llama3.2", "prompt":"Why is the sky blue?", "stream": false}' -uri http://localhost:11434/api/generate ).Content | ConvertFrom-json
 ```
 
 ## Troubleshooting
diff --git a/examples/go-chat/main.go b/examples/go-chat/main.go
index 7663fb8f..07430305 100644
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
 
 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3.1",
+		Model:    "llama3.2",
 		Messages: messages,
 	}
 
diff --git a/examples/langchain-python-rag-document/README.md b/examples/langchain-python-rag-document/README.md
index e2f3bc02..d37afc9d 100644
--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
@@ -4,10 +4,10 @@ This example provides an interface for asking questions to a PDF document.
 
 ## Setup
 
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3.2` model installed:
 
 ```
-ollama pull llama3.1
+ollama pull llama3.2
 ```
 
 2. Install the Python Requirements.
diff --git a/examples/langchain-python-rag-document/main.py b/examples/langchain-python-rag-document/main.py
index 6f7cec9b..4871a042 100644
--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -51,7 +51,7 @@ while True:
         template=template,
     )
 
-    llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3.2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
     qa_chain = RetrievalQA.from_chain_type(
         llm,
         retriever=vectorstore.as_retriever(),
diff --git a/examples/langchain-python-rag-websummary/README.md b/examples/langchain-python-rag-websummary/README.md
index 29c706a3..746c47ab 100644
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
@@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
 
 ## Running the Example
 
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3.2` model installed:
 
    ```bash
-   ollama pull llama3.1
+   ollama pull llama3.2
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/langchain-python-rag-websummary/main.py b/examples/langchain-python-rag-websummary/main.py
index 77b09fbb..56f8bd24 100644
--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -5,7 +5,7 @@ from langchain.chains.summarize import load_summarize_chain
 loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 docs = loader.load()
 
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3.2")
 chain = load_summarize_chain(llm, chain_type="stuff")
 
 result = chain.invoke(docs)
diff --git a/examples/langchain-python-simple/README.md b/examples/langchain-python-simple/README.md
index 60db2c8c..680ab560 100644
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
 
 ## Running the Example
 
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3.2` model installed:
 
    ```bash
-   ollama pull llama3.1
+   ollama pull llama3.2
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py
index a7ed81d6..8d6989c8 100644
--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
@@ -1,6 +1,6 @@
 from langchain.llms import Ollama
 
 input = input("What is your question?")
-llm = Ollama(model="llama3.1")
+llm = Ollama(model="llama3.2")
 res = llm.predict(input)
 print (res)
diff --git a/examples/modelfile-mario/Modelfile b/examples/modelfile-mario/Modelfile
index a3747086..b8e49667 100644
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
@@ -1,4 +1,4 @@
-FROM llama3.1
+FROM llama3.2
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.
diff --git a/examples/modelfile-mario/readme.md b/examples/modelfile-mario/readme.md
index c3f34197..882023ad 100644
--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@
 
 # Example character: Mario
 
-This example shows how to create a basic character using Llama3.1 as the base model.
+This example shows how to create a basic character using Llama 3.2 as the base model.
 
 To run this example:
 
 1. Download the Modelfile
-2. `ollama pull llama3.1` to get the base model used in the model file.
+2. `ollama pull llama3.2` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`
 
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:
 
 ```
-FROM llama3.1
+FROM llama3.2
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.
diff --git a/examples/python-grounded-factuality-rag-check/README.md b/examples/python-grounded-factuality-rag-check/README.md
index 5c981752..cd72071c 100644
--- a/examples/python-grounded-factuality-rag-check/README.md
+++ b/examples/python-grounded-factuality-rag-check/README.md
@@ -1,14 +1,14 @@
 # RAG Hallucination Checker using Bespoke-Minicheck
 
-This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.1` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. 
+This example allows the user to ask questions related to a document, which can be specified via an article url. Relevant chunks are retreived from the document and given to `llama3.2` as context to answer the question. Then each sentence in the answer is checked against the retrieved chunks using `bespoke-minicheck` to ensure that the answer does not contain hallucinations. 
 
 ## Running the Example
 
-1. Ensure `all-minilm` (embedding) `llama3.1` (chat) and `bespoke-minicheck` (check) models installed:
+1. Ensure `all-minilm` (embedding) `llama3.2` (chat) and `bespoke-minicheck` (check) models installed:
 
    ```bash
    ollama pull all-minilm
-   ollama pull llama3.1
+   ollama pull llama3.2
    ollama pull bespoke-minicheck
    ```
 
diff --git a/examples/python-grounded-factuality-rag-check/main.py b/examples/python-grounded-factuality-rag-check/main.py
index f4d562d5..eab0b670 100644
--- a/examples/python-grounded-factuality-rag-check/main.py
+++ b/examples/python-grounded-factuality-rag-check/main.py
@@ -119,7 +119,7 @@ if __name__ == "__main__":
         system_prompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
 
         ollama_response = ollama.generate(
-            model="llama3.1",
+            model="llama3.2",
             prompt=question,
             system=system_prompt,
             options={"stream": False},
diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py
index 68090ad7..91463760 100644
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,7 +2,7 @@ import requests
 import json
 import random
 
-model = "llama3.1"
+model = "llama3.2"
 template = {
   "firstName": "",
   "lastName": "",
diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py
index 878c9803..3df59d32 100644
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
@@ -12,7 +12,7 @@ countries = [
     "France",
 ]
 country = random.choice(countries)
-model = "llama3.1"
+model = "llama3.2"
 
 prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
 
diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md
index 5b444dff..a551e1dd 100644
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
 
 ## Running the Example
 
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3.2` model installed:
 
    ```bash
-   ollama pull llama3.1
+   ollama pull llama3.2
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py
index 85043d5f..6ef14ffc 100644
--- a/examples/python-simplechat/client.py
+++ b/examples/python-simplechat/client.py
@@ -2,7 +2,7 @@ import json
 import requests
 
 # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
-model = "llama3.1"  # TODO: update this for whatever model you wish to use
+model = "llama3.2"  # TODO: update this for whatever model you wish to use
 
 
 def chat(messages):
diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md
index 4c2ded4d..a4a2dfc1 100644
--- a/examples/python-simplechat/readme.md
+++ b/examples/python-simplechat/readme.md
@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
 
 ## Running the Example
 
-1. Ensure you have the `llama3.1` model installed:
+1. Ensure you have the `llama3.2` model installed:
 
    ```bash
-   ollama pull llama3.1
+   ollama pull llama3.2
    ```
 
 2. Install the Python Requirements.
diff --git a/examples/typescript-simplechat/client.ts b/examples/typescript-simplechat/client.ts
index 8ad113b1..d8faaa1b 100644
--- a/examples/typescript-simplechat/client.ts
+++ b/examples/typescript-simplechat/client.ts
@@ -1,6 +1,6 @@
 import * as readline from "readline";
 
-const model = "llama3.1";
+const model = "llama3.2";
 type Message = {
   role: "assistant" | "user" | "system";
   content: string;
diff --git a/macapp/src/app.tsx b/macapp/src/app.tsx
index a627e63d..449fc851 100644
--- a/macapp/src/app.tsx
+++ b/macapp/src/app.tsx
@@ -19,7 +19,7 @@ export default function () {
   const [step, setStep] = useState<Step>(Step.WELCOME)
   const [commandCopied, setCommandCopied] = useState<boolean>(false)
 
-  const command = 'ollama run llama3.1'
+  const command = 'ollama run llama3.2'
 
   return (
     <div className='drag'>