diff --git a/README.md b/README.md index 7687ac5f..d17b9723 100644 --- a/README.md +++ b/README.md @@ -295,6 +295,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama) +- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord ) - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama) - [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations) - [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS) @@ -310,6 +311,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Claude Dev](https://github.com/saoudrizwan/claude-dev) - VSCode extension for multi-file/whole-repo coding - [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support) - [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption) +- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library) ### Terminal @@ -346,6 +348,7 @@ See the [API documentation](./docs/api.md) for all endpoints. ### Package managers - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/) +- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama) - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama) - [Guix channel](https://codeberg.org/tusharhero/ollama-guix) - [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama) @@ -416,6 +419,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama) - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face) - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension) +- [Plasmoid Ollama Control](https://github.com/imoize/plasmoid-ollamacontrol) (KDE Plasma extension that allows you to quickly manage/control Ollama model) - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend) - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support) - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation) diff --git a/cmd/cmd.go b/cmd/cmd.go index f6d31f5b..5de1ed1b 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -1421,6 +1421,8 @@ func NewCLI() *cobra.Command { envVars["OLLAMA_TMPDIR"], envVars["OLLAMA_FLASH_ATTENTION"], envVars["OLLAMA_LLM_LIBRARY"], + envVars["OLLAMA_GPU_OVERHEAD"], + envVars["OLLAMA_LOAD_TIMEOUT"], }) default: appendEnvDocs(cmd, envs) diff --git a/convert/convert_gemma2.go b/convert/convert_gemma2.go index c4ee2d09..0f98c1e3 100644 --- a/convert/convert_gemma2.go +++ b/convert/convert_gemma2.go @@ -34,10 +34,20 @@ func (p *gemma2Model) KV(t *Tokenizer) llm.KV { } func (p *gemma2Model) Replacements() []string { - return append( - p.gemmaModel.Replacements(), + return []string{ + "model.embed_tokens", "token_embd", + "model.norm", "output_norm", + "model.layers", "blk", + "input_layernorm", "attn_norm", + "self_attn.q_proj", "attn_q", + "self_attn.k_proj", "attn_k", + "self_attn.v_proj", "attn_v", + "self_attn.o_proj", "attn_output", + "mlp.gate_proj", "ffn_gate", + "mlp.down_proj", "ffn_down", + "mlp.up_proj", "ffn_up", "post_attention_layernorm", "post_attention_norm", "pre_feedforward_layernorm", "ffn_norm", "post_feedforward_layernorm", "post_ffw_norm", - ) + } } diff --git a/convert/convert_test.go b/convert/convert_test.go index 9eb1632f..2969673d 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" "golang.org/x/exp/maps" @@ -22,6 +23,12 @@ import ( "github.com/ollama/ollama/llm" ) +type tensorData struct { + Offsets []int `json:"data_offsets"` + Type string `json:"dtype"` + Shape []int `json:"shape"` +} + func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) { t.Helper() @@ -96,6 +103,7 @@ func TestConvertModel(t *testing.T) { "Mistral-7B-Instruct-v0.2", "Mixtral-8x7B-Instruct-v0.1", "gemma-2b-it", + "gemma-2-2b-it", // microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8 "Phi-3-mini-128k-instruct", "all-MiniLM-L6-v2", @@ -140,6 +148,36 @@ func TestConvertModel(t *testing.T) { } } +func TestConvertInvalidTensorNames(t *testing.T) { + f, err := os.CreateTemp(t.TempDir(), "testmodel") + if err != nil { + t.Fatal(err) + } + defer f.Close() + + tempDir := t.TempDir() + + td := map[string]*tensorData{} + offset := 4096 + + td["model.layers.0.self_attn.q_proj.weight"] = &tensorData{ + Offsets: []int{0, offset}, + Type: "F32", + Shape: []int{4096, 4096}, + } + td["blk.0.attn_q.weight"] = &tensorData{ + Offsets: []int{offset, offset * 2}, + Type: "F32", + Shape: []int{4096, 4096}, + } + generateSafetensorTestData(t, tempDir, td) + + err = ConvertModel(os.DirFS(tempDir), f) + if err == nil || !strings.HasPrefix(err.Error(), "duplicate tensor name") { + t.Errorf("expected error but didn't get one") + } +} + func TestConvertInvalidDatatype(t *testing.T) { f, err := os.CreateTemp(t.TempDir(), "testmodel") if err != nil { @@ -148,23 +186,10 @@ func TestConvertInvalidDatatype(t *testing.T) { defer f.Close() tempDir := t.TempDir() - generateSafetensorTestData(t, tempDir) - - err = ConvertModel(os.DirFS(tempDir), f) - if err == nil || err.Error() != "unsupported safetensors model" { - t.Errorf("expected error but didn't get one") - } -} - -func generateSafetensorTestData(t *testing.T, tempDir string) { - type tensorData struct { - Offsets []int `json:"data_offsets"` - Type string `json:"dtype"` - Shape []int `json:"shape"` - } - offset := 4096 * 14336 td := map[string]*tensorData{} + offset := 4096 * 14336 + td["model.layers.0.mlp.down_proj.weight"] = &tensorData{ Offsets: []int{0, offset}, Type: "I8", @@ -175,8 +200,16 @@ func generateSafetensorTestData(t *testing.T, tempDir string) { Type: "U8", Shape: []int{}, } + generateSafetensorTestData(t, tempDir, td) - data, err := json.Marshal(td) + err = ConvertModel(os.DirFS(tempDir), f) + if err == nil || err.Error() != "unsupported safetensors model" { + t.Errorf("expected error but didn't get one") + } +} + +func generateSafetensorTestData(t *testing.T, tempDir string, tensorData map[string]*tensorData) { + data, err := json.Marshal(tensorData) if err != nil { t.Fatal(err) } @@ -322,11 +355,6 @@ func TestConvertAdapter(t *testing.T) { } func generateLoraTestData(t *testing.T, tempDir string) { - type tensorData struct { - Offsets []int `json:"data_offsets"` - Type string `json:"dtype"` - Shape []int `json:"shape"` - } offset := 4096 * 8 * 4 td := map[string]*tensorData{"__metadata__": nil} diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go index e1dde8fa..b21d219c 100644 --- a/convert/reader_safetensors.go +++ b/convert/reader_safetensors.go @@ -49,12 +49,19 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T keys := maps.Keys(headers) slices.Sort(keys) + names := make(map[string]struct{}, len(keys)) + for _, key := range keys { if value := headers[key]; value.Type != "" { // bitsandbytes quantized models are unsupported if len(value.Shape) == 0 { return nil, errors.New("unsupported safetensors model") } + ggufName := replacer.Replace(key) + if _, ok := names[ggufName]; ok { + return nil, fmt.Errorf("duplicate tensor name '%s' was found for this model", ggufName) + } + names[ggufName] = struct{}{} ts = append(ts, safetensor{ fs: fsys, path: p, @@ -62,7 +69,7 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T offset: safetensorsPad(n, value.Offsets[0]), size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]), tensorBase: &tensorBase{ - name: replacer.Replace(key), + name: ggufName, shape: value.Shape, }, }) diff --git a/convert/testdata/gemma-2-2b-it.json b/convert/testdata/gemma-2-2b-it.json new file mode 100644 index 00000000..8f34e667 --- /dev/null +++ b/convert/testdata/gemma-2-2b-it.json @@ -0,0 +1,312 @@ +{ + "general.architecture": "gemma2", + "general.file_type": "1", + "general.quantization_version": "2", + "gemma2.block_count": "26", + "gemma2.context_length": "8192", + "gemma2.embedding_length": "2304", + "gemma2.feed_forward_length": "9216", + "gemma2.attention.head_count": "8", + "gemma2.attention.head_count_kv": "4", + "gemma2.attention.key_length": "256", + "gemma2.attention.value_length": "256", + "gemma2.attention.layer_norm_rms_epsilon": "1e-06", + "tokenizer.ggml.model": "llama", + "tokenizer.ggml.add_bos_token": "true", + "tokenizer.ggml.add_eos_token": "false", + "tokenizer.ggml.bos_token_id": "2", + "tokenizer.ggml.eos_token_id": "1", + "tokenizer.ggml.padding_token_id": "0", + "tokenizer.ggml.unknown_token_id": "3", + "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8", + "tokenizer.ggml.token_type": "8d40143b3477df77beea4139420335ede458bf5e14102f01b0170197b55da8d8", + "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda", + "token_embd.weight": "64a9d30707e659e2e673656d71f5aef7a9fb9fd83bb9a77558dfc5abbe218a05", + "blk.0.attn_k.weight": "d8b4437c5edb3cddf6af9987038e1bb2b191c4f0fce0e160d2abace717f5d5d7", + "blk.0.attn_norm.weight": "1eb73e3f7aa8e502f6ca31cd19efbb8e4fd9a89692e13e48ac8205545a7fa7e8", + "blk.0.attn_output.weight": "39e7b78e57d356a22dd89ce1c4d7163b970712ba756545e1703f97866cd2192e", + "blk.0.attn_q.weight": "795058e23b6109febd9d55c89e1eebe6af0714ec8c56fd86a160876a6135ffe8", + "blk.0.attn_v.weight": "0cd6e583d1887c020472e961bbb113fe5a0d23ae2f1c2c876fc366cdb7692b52", + "blk.0.ffn_down.weight": "51eb4d962189e945a84e94e0dc1aad3f8f90cc1a11e18029670afcd0ea0acb1b", + "blk.0.ffn_gate.weight": "9811a29b8ad48432925897ab21dfcb13c5cbd372aeccbbefca9b7866883b4ce3", + "blk.0.ffn_norm.weight": "92cbf4652ef503c1de5b10f2be00b3fcf00100980cb3baa8f3013a8d8bf3d851", + "blk.0.ffn_up.weight": "af87de21746879483ed1b374cdd76b19ba11ca2b6dbb1beba98efdf3be3e8077", + "blk.0.post_attention_norm.weight": "32e135f1f258ffe407018899e39af1725d59d66d60022b9a21575ba160e0357a", + "blk.0.post_ffw_norm.weight": "ba286f5ac11b07fbc986173708c66f1920427be5a6d108af38fa0a837c1c8eb6", + "blk.1.attn_k.weight": "51584435552051f7fade76beca582b3f7190cf7fc07adcf527c2774d4b1c3901", + "blk.1.attn_norm.weight": "6833104c7fbf35a7e799ae56c262b97fffa14789642aee14381b25acd21ed80a", + "blk.1.attn_output.weight": "14c39481369087bf292ac9a3ab2ef166f9fe376a9f90c246653213ef264febdc", + "blk.1.attn_q.weight": "443f64ae2229f857c69d6bebb7800b685786cb77884c3ae19d4286aeed081325", + "blk.1.attn_v.weight": "0df482de2038f1e4c8a7733ac0ddb69ad90759dab5968b942af0155588de4c4a", + "blk.1.ffn_down.weight": "66f30763a8bbbcaea609a0087ed75fadb5e771c06378dd2cea94cf17e492e8cf", + "blk.1.ffn_gate.weight": "a7151bff00a545fa18b2c92dcd2a14572ccf9beb957a6c494f1374e8ebe174c9", + "blk.1.ffn_norm.weight": "e197d71ea11b5276bc0167d2663b88089b3ff42b47ba91e85f6c5d95f6306435", + "blk.1.ffn_up.weight": "57c182e0b14cccd1350d388f0c616991702e74281db54637451b70f4ccc24f9b", + "blk.1.post_attention_norm.weight": "3c56f837168d784c2d8bac247c130bdca6610c095c8da4558c536ccad7605609", + "blk.1.post_ffw_norm.weight": "d2a51d320fd01069dd7ccaa7082f16a7faeb671885607d7900b10a89c354d0fa", + "blk.2.attn_k.weight": "bc103c818192de7ce36caaf89dc117be4df13fb902e0bd9a23c64edace5df9b6", + "blk.2.attn_norm.weight": "0f2503aa126083a5d6ac72481be1ef66c6014705b573682b35bd864e4749a3d5", + "blk.2.attn_output.weight": "05fcd4a1226e482f91803a266f72caca887a93e63c2d2ba5611ab3c68d38743a", + "blk.2.attn_q.weight": "6a10b5c2fd423d1e4c4fd60fa8c154a0159b6b2501ea79cae2ef19f45a674e5e", + "blk.2.attn_v.weight": "3cf891945a1f8ae7cc908a5c6b729ff5b70f4436c5ffdbf245cc0ed4cc19cd1b", + "blk.2.ffn_down.weight": "ea204fd04e0d2fc728a9861a459216bbfec629c152004ba625f52cd8837bd51e", + "blk.2.ffn_gate.weight": "3a3518729f1b8b64a82b8792f33987db5418fdb094be0263c68f146a5c38de54", + "blk.2.ffn_norm.weight": "754ede678b725de41a34b82f0edf7688b5c065be7c0d46df6f7ad9430d986884", + "blk.2.ffn_up.weight": "ffdcb88439f5828ffbd9fc844b03ff91637b790b9838097258cc3ae75935720c", + "blk.2.post_attention_norm.weight": "4b3f53b7ba26e8c36b2dfda3b7e5fc4b1065257cefdea235fc7df9af130ac2fd", + "blk.2.post_ffw_norm.weight": "e550369e26b8485e2b54ad34b34bc98af5494287dcc513c2c39cf1eaa5b89d07", + "blk.3.attn_k.weight": "89f24ea450e37d9e95757651a83205c085d81b354ee9489dd6310a391d8409f3", + "blk.3.attn_norm.weight": "24e2ea662b7cb822b4ca5cd61bc17f2709f406d990ec3b4a0dac1cc112db45cf", + "blk.3.attn_output.weight": "ac4dad69473c6e3fac56669212cadd8c34ecc5973d945972e974d94805334967", + "blk.3.attn_q.weight": "b6a9c9a7d4722b9096631c65de62228dfddca6e26edfe6af7fce01e116ef0f4c", + "blk.3.attn_v.weight": "f272a960a40093942309bc342a379984cbacec2d7bc64428db3f64e6b1887ed4", + "blk.3.ffn_down.weight": "c0188ba50d8228805982029c277fc0e87aa57473b8363037c648f6d006ff828a", + "blk.3.ffn_gate.weight": "a04aec1561ee6c0fbb18c3db49dc62fb533619cf697fd548cbf2279761aaec3b", + "blk.3.ffn_norm.weight": "bc053837d44087ec05eb5d9458357b2a5be787789b19cdbbdc694b57697f99a6", + "blk.3.ffn_up.weight": "b3ce8b274f20796d3b1a7c08ba27a919066f9de89a782faa544c4a8d6bea1382", + "blk.3.post_attention_norm.weight": "9c922dee7a7df5667289e2788e60170238239cee2dfdbbd9e435763f9f416718", + "blk.3.post_ffw_norm.weight": "b682544ac953ad2e0b49027ed8916f2e9d1aba5d1587bb4127ac703570c7a03a", + "blk.4.attn_k.weight": "143b0cbb4b787b95c2b6212374410e32173ccef2adb914908a2f89a7916de512", + "blk.4.attn_norm.weight": "5668f60491b780273745192662d02c9a92a4f692b29d16aa0bbc7413fec4f85b", + "blk.4.attn_output.weight": "b9f2bdb68be1e0cf66dd19f8fa2afb105910ad2ef394864cb32cea8f8944e0d5", + "blk.4.attn_q.weight": "ddcf1343dafbc2dfcd0b8741225af22fe4b54b2becce29240bd01c34265d126c", + "blk.4.attn_v.weight": "6dc7074366e7ed52d9f48c594dcc85bef738e096276cb99d28228c89eecc5b9c", + "blk.4.ffn_down.weight": "30334ffc59ce343cf2a1b973174acb7722823463adc07e19a99bd0f404bc9906", + "blk.4.ffn_gate.weight": "890f7c8af208d63b28db52c4b8c16c2288a382d87ff5a6a6d6b0a5b3bf27e6cd", + "blk.4.ffn_norm.weight": "ff0316cc7847221eb86a90c1ab441d4ee61553d410c66414a7755021b3b12448", + "blk.4.ffn_up.weight": "6af97d113f91564c636734f215e25ee602d48eb045458f300b3ec7582be0f41d", + "blk.4.post_attention_norm.weight": "69438f231e105e68216b078bdeb35a7cdc8b12c4e2845e18ecf4c8d361d6a321", + "blk.4.post_ffw_norm.weight": "0fd535da78bcf2b32c95b05b2b83dc49817393765be90d8cc1ed3d56f47b68ec", + "blk.5.attn_k.weight": "0166eb3c6d20dcf3d3c169e94caa8dee057535bb525e29f698fb6f8844f18a6c", + "blk.5.attn_norm.weight": "a7808f27f164023d5cde2be00fc23cac6c71aa0ddeb60bc23e12411b80087672", + "blk.5.attn_output.weight": "8b65b2027a0842b68c5308f91d6a31de9599d794157d77df8418b19f9e0d9334", + "blk.5.attn_q.weight": "966bc626ef2c2394d872087a41c126bb1b67d1d5f6de920204ef5e5b16c34003", + "blk.5.attn_v.weight": "9a362aef3f4437fbf0ef6e1ba785f3329c3db2960f93fe36547d2795e9c254ea", + "blk.5.ffn_down.weight": "63e53541d34197720c06f297aa8142ac6b6eec002c7987b296f26e8b1400f931", + "blk.5.ffn_gate.weight": "d9591fdd32f783e0fc26e20d5d587ee8971ac8ae2e4c818c6eac1c125c7c7f37", + "blk.5.ffn_norm.weight": "677334cc60ecce3a7f4ab3acda15d359353d7358872f614ad8914e3780e9fc6e", + "blk.5.ffn_up.weight": "a63764110e1c655ffbd55af0669b2dfe4cc29d0e198d33a8e5426461b08a85f7", + "blk.5.post_attention_norm.weight": "c55499f859b2c0a7f5cabceaae47309a5ad38bc29d0f4a8db81f1357023162a9", + "blk.5.post_ffw_norm.weight": "82752754665f842418f3e302cb5f43d1e0504dcd124c4b8ddb77018b2c793837", + "blk.6.attn_k.weight": "e20a5f0d6c807273c8d491439566b428497ac02097cf0aa55e33748c28e14be6", + "blk.6.attn_norm.weight": "2c6ba42fd3c73d72073ced03a32dd28d70a89ed9bbbc8fea1ba03a7ade951e6c", + "blk.6.attn_output.weight": "4de7c5c2f4a133a266e17ed8c14c52959466b54cc7ab9e19f789a33b4850f284", + "blk.6.attn_q.weight": "56462d921800e6b8cd2213fef04c4ff16d728905cb2f4c58e966d0a053a3b0ae", + "blk.6.attn_v.weight": "b758dcbff769d6240c2245ede1dbc62c4170a67c77458e866312589220fe29af", + "blk.6.ffn_down.weight": "582247fb3c2bf687cbe9413fe18d18ad47bef4b65df7d78905e10335c6134764", + "blk.6.ffn_gate.weight": "3035444d5286aefb7a6d04e55bc27e1fac7cf895cd5be02319a431b8e047b4ae", + "blk.6.ffn_norm.weight": "e582d24c66e01b96faa20ce6adfda3d8583b11e809bff89969927398175e369a", + "blk.6.ffn_up.weight": "6f4b7bbfedeacf61a4866ae0616c4ba6c9e856662e8f00ae6aaec7f52c53e7b4", + "blk.6.post_attention_norm.weight": "8fe51b50bd677d21586aecab0b565c4bf9fa68ad50bfe366f45e8fea3c657ca8", + "blk.6.post_ffw_norm.weight": "81ba3cb4c2bf5c546b86855b7a885d3fafededc67eb3a35cd3598b03c9e26e65", + "blk.7.attn_k.weight": "2e044179cdcae0946708c86bfea7aa0391e1f7e2a09b33fca035d384cc3ca758", + "blk.7.attn_norm.weight": "94b48c546b046803c60e75a3acb17a356b710735989938021b565f68df9b4985", + "blk.7.attn_output.weight": "65709b4ad7a581f4d75793d39d4032a359f6bcc0c3835205242a0b99e5b66824", + "blk.7.attn_q.weight": "8ded993c95d1f7caf201ceb6fa035cd6ed6d351b50b999fa9355dfee9486cb5b", + "blk.7.attn_v.weight": "c92d5e2d2d48397542bc03bea25bf39154075e66c5bb1ead85188505aa04ae91", + "blk.7.ffn_down.weight": "e8ba8fb57208805ef1dc23cd7c86e9a2d1fb7c52c3940d292cd5bb2eb24b3fac", + "blk.7.ffn_gate.weight": "f0f06d6a2e06c5ac252083bc61d05c814e6289d3f4e4a87d2f06918254c02c36", + "blk.7.ffn_norm.weight": "ebf8ef775f72624148e09d68a4332187a7a5020c521fe0623da1cd3485ad33e0", + "blk.7.ffn_up.weight": "a554adc4fc7122c247c77670e169916ba1794c787b5be30a2b36705138f1f746", + "blk.7.post_attention_norm.weight": "3aa6bc21d85c3a0c12b964e82b12feaedfdd13130c3cd2229228e24e0967ebdf", + "blk.7.post_ffw_norm.weight": "508bc7b19ee8ff08f0007c890133a462fc57c7e72b16ee8f6dd64def264ef876", + "blk.8.attn_k.weight": "363c8e74056642fe9e7c2f3f9769d57319cd3fa0a6022810189ab8d894322885", + "blk.8.attn_norm.weight": "685b49a1f1acb169f4df0bdd8e3de6943f3033cebad14b898a72000595610d92", + "blk.8.attn_output.weight": "7bde571e4efef1c6a6143f0526721dfb59e0a0ea0e1a3616a322b2eb937efa48", + "blk.8.attn_q.weight": "fc993dbc1074c28a0e1d85e5ab2f4ea6a9c6c1affe7ee56027000a275daed9b6", + "blk.8.attn_v.weight": "281e8791d3aef9b3864f1cb054da0ae0c2fef4ce0a58b1bad8bc136b2fa0f62b", + "blk.8.ffn_down.weight": "b1164a2578a7f87ed99c2bbc76c5dfbbbc6a1a803605391acc3f320fc989ffd7", + "blk.8.ffn_gate.weight": "6b39a3b3aaaa79aee61416b54d62160b9258042650e61c6b47bc77c2dd17daf3", + "blk.8.ffn_norm.weight": "17ea1362c72da27f12bc936500492035bdef3fd8f940cb12b57f37d42ba8ecb1", + "blk.8.ffn_up.weight": "bc3a7c47afc440d2bdf8fbe9ddf2c9220467472c60c8b4ded8c0f181470ec96c", + "blk.8.post_attention_norm.weight": "5c506204e00411ef9c8b4134d40eedcc19fffe68dd0af7d7cc49dcabf2dfac7e", + "blk.8.post_ffw_norm.weight": "002faec235c3678864e2901eed275ce4e9dc229164a91c9cd4c965142ba62305", + "blk.9.attn_k.weight": "0bab39d8c237f1b6d0010db40467142625a9e6f2e0e4c49a56c12b41e4e0b1fa", + "blk.9.attn_norm.weight": "de5f38e873b17f07aa7598831b89cc1cae2c9bc3eb2e042ee9af059d2563e84e", + "blk.9.attn_output.weight": "8a8184702c25a62df9ff309c0c7badc8587208523b2be3e8fa90ce7080573e6f", + "blk.9.attn_q.weight": "7c961b2431b09ddf95377acd07201cb91bf13d9cd3ae0f2c25c7d6a0358d9f50", + "blk.9.attn_v.weight": "e22d240cb4743067033e659cbf210ebe2ebbab3e1dea6ccbe5eaa982382ca038", + "blk.9.ffn_down.weight": "a426f81210f03d6ad53277416e1fdcdf37d8065e4817613edaf6c67a343426be", + "blk.9.ffn_gate.weight": "a82eba825cb77b8e64f85ff99ede2fc71bc9b01751eeb17e9e6c246ee12ea62e", + "blk.9.ffn_norm.weight": "1a97f9b1302a3a326d534c5c3fed2db6db0ae45fd0edd381a3e4fc1c75d81030", + "blk.9.ffn_up.weight": "5f20bac2bbf03bb42adb92fbf99561651e1edda57e0b61935ac7f6c08c0ed7cb", + "blk.9.post_attention_norm.weight": "9f9866d13988e1946b1e1c80d9374a92a6e3be33748f8eaed3e126d1e1a4c796", + "blk.9.post_ffw_norm.weight": "a6896dbf698db4dbbe5dbf12417d4fd80e9cad0c539c858892ec0aa5b046bb58", + "blk.10.attn_k.weight": "ca8446e5d21ecd4e6a70dca8d321be480be4fba94d70cba065205436feb44270", + "blk.10.attn_norm.weight": "4f41fe290e8f21f63b82151b6cce94bf7318d121468816b0c58af0ff7c1658ab", + "blk.10.attn_output.weight": "c626d2e9681c5c941bbde43dddfae1a8d4986bf2be4470857bc8e8bd7f869044", + "blk.10.attn_q.weight": "1e61b210a13a429977325cf15d781ab77d604cfa862f4270329cbd94237d5835", + "blk.10.attn_v.weight": "8ff8d3e3f058ec3b35ada1057f2ed59c06494d0e0be6a8dc3ff9edf9f0e1a115", + "blk.10.ffn_down.weight": "bcebc04219f8081a5f483e58103c0ddbbbc631a0a54fd6dd9d55778e041f70ee", + "blk.10.ffn_gate.weight": "7a23a1e620ef871384ddf9611ccdcfb893fbf013cc203ac8e72f745420f1eea0", + "blk.10.ffn_norm.weight": "e3a375e43c349a1c6c66c22328e513cc1af3137fe839e43dc8e9be2f65914fd7", + "blk.10.ffn_up.weight": "5d182e7c94369194fca5f19cbbe668a999911e57f3d363bc7fb6088428700cb9", + "blk.10.post_attention_norm.weight": "b841c6308296e8984f3c5f549c6e3a242f4b3e19141e1f54cc08de9c46759c09", + "blk.10.post_ffw_norm.weight": "9d66fa05b5c940208f634f5053d809094c99a2a10a1d1e8847c8281fbd99fb49", + "blk.11.attn_k.weight": "14adf24ebb2bb17b336ca81cec3e690fd854782f4440ca6c66cc1d7e7bf1c850", + "blk.11.attn_norm.weight": "2d2213f311f50414702b5b34f22aafb9d9a0b6787243e7578562583dc40ad195", + "blk.11.attn_output.weight": "de1f14cc2a7fff00cf11b229f0576999205f17b9536e97abc9d6de3cc79a7884", + "blk.11.attn_q.weight": "2bcc5c147524003109ece0be08b89ac8b25baa71416ffa76573c6c052ffc6eea", + "blk.11.attn_v.weight": "2e6ab8573070c22dc1e0d7aebe4d52123226dacf7822dcce06fadbb38fb036a4", + "blk.11.ffn_down.weight": "1b86902f4e36868421e5228b9445051f8290b292df22a6d1af836dcecc1f25c3", + "blk.11.ffn_gate.weight": "e756e8081bd0a16aea4a9ef5076ad102113524f7a3d50a3a77aaa7f7938b63e8", + "blk.11.ffn_norm.weight": "6913887267be227cf9d1991a3dd8db2e7e74bb9b5fbdfcb9ac954fd7d7b95b3b", + "blk.11.ffn_up.weight": "619a3ac0609ebdf42c3fb2b6e4b1db48df79e6dd8418d7ab8f1bbff13d8a6a50", + "blk.11.post_attention_norm.weight": "e4b4ba92cef7b6a78407e8ab1b0307d47dac6c3df7b6817e28038317ff662d7e", + "blk.11.post_ffw_norm.weight": "40aceeec58cb855f0c158c9cc217168fcd5d0e735567d587217b1d78df17bc5f", + "blk.12.attn_k.weight": "c54c5a4d4892522022d1aa2204cfc624f0b4042caa536e678967316293fe5cb1", + "blk.12.attn_norm.weight": "7cd2ef58298569ffdf244d9b390f3917245276c8206e5780af5f96d8c0bbb446", + "blk.12.attn_output.weight": "85495ef9cc8b3deb21f741bde463ff6493acae2be51f02ecdeef952cbdec3375", + "blk.12.attn_q.weight": "d19383f83fd119bfb8c0280c9515705c11d8e7d502019fcf8f49efeef0d106d0", + "blk.12.attn_v.weight": "869ac669ba49531d9128892a0e27cef15de508ff40cdf80cc1681dde50d09204", + "blk.12.ffn_down.weight": "578f39f8f9fc2f09138afc884a952d7cc3a9a31de4216acd10e88e19e0b75f8c", + "blk.12.ffn_gate.weight": "e29a0186bc6c4a0720246306e922d3a83f777dadcf4ac80bad468287031cc8b5", + "blk.12.ffn_norm.weight": "e1ee95c6584b5cb57fcf1db8ce2bcc03aff91eb389238c094a61c00dde93d1f2", + "blk.12.ffn_up.weight": "2a826f06d7cdfb3edc6ae250ff44363ef77a2a9cdf96313e23a331b99ebfa17d", + "blk.12.post_attention_norm.weight": "4bafc7699b948d5cbc0d3e09b418b06c6abc4651a61ada9609d9a2f21c7e5607", + "blk.12.post_ffw_norm.weight": "bbb8c34a7176bb1a49f9fe2bacca0bd26b673d52c0835b2e90fa11f2962f077f", + "blk.13.attn_k.weight": "ffeefccfe8255d1b694382012ff4134eee5fec9d9491c8d0ff0a13832d1a37e8", + "blk.13.attn_norm.weight": "35713726529e3887c4135a88e86e8a4d7270ba5b9f2d1ab462622fbf40a7cdce", + "blk.13.attn_output.weight": "0d60b7c5cd71190a9ef4b873b0f516be15447c32d83914db2794b14592b0b460", + "blk.13.attn_q.weight": "8296069e65bef794cefc61257fc65789b3cb22955e30f3df129205e5041b2222", + "blk.13.attn_v.weight": "ca0f4ab9d16a748fc643a5c0c7a19826a811bf2a4e7316a8c935d4bf0ce8abc6", + "blk.13.ffn_down.weight": "d5514e0c8e7b3ed1cbcc1605eb5be1733b6ab3514cf8a0508fc72f7d05ed8bcb", + "blk.13.ffn_gate.weight": "8108e517a82e08a3aefbbd267bfa50a1668f92a76273280ce8a6bc1f6dd61521", + "blk.13.ffn_norm.weight": "5fcb6132d2134bf1f835b904a99820fa501dbc57d2224129f7098bf3cabc1d36", + "blk.13.ffn_up.weight": "6d744b7cd390a3cae3aa350dd379b81246acd056a2259996b6aaadece8465ccc", + "blk.13.post_attention_norm.weight": "e08b14698912509790e9575b8676971fbb0a4d82d719367e3756c0d0c4ab8cc0", + "blk.13.post_ffw_norm.weight": "2b196e4450fc5f1e7367b2cf7fe33a15fe919fbcdd861d11002346f16e980535", + "blk.14.attn_k.weight": "120e5f48d7268dfd9ab5f4bc9cc57a7cec63ea9635f56b80d435eb22936e9483", + "blk.14.attn_norm.weight": "146367bcce4db72cc894419a2e0145a6f533507dd68e4739c10ee480308c401f", + "blk.14.attn_output.weight": "720fa0165e756876c5cb6ad9e2780dd910390933f3f8849e5add5da04266650b", + "blk.14.attn_q.weight": "f5183466f56219ca1aca52d8b82c2d966a4198fea40fdd6b39f4d8b06ca2a6dd", + "blk.14.attn_v.weight": "24f8ea3d5512cd37c43c8329cb0da0c90d1895aef763ac2dcee3fe5157ec50a2", + "blk.14.ffn_down.weight": "e29960965b384ae5ab3d898a4dbaa8fddd28fa0e477ac28bcac49dec12a5ac67", + "blk.14.ffn_gate.weight": "6d0d6a74bfe9692e8f8eedff0fc34fc4fa1c8687794f35f2e2b033ab2d7510b8", + "blk.14.ffn_norm.weight": "f7036c1a9a71e046c9d2af16e9218fda5dbb0f7241ab44747abed1f0f9d602ca", + "blk.14.ffn_up.weight": "7d69ea1424007ffc9c12247dd0308c616e93ac02a59ec341cfa48f92d6ce3b10", + "blk.14.post_attention_norm.weight": "65b9712834d9445d4236bec362f3fb795c20d60c541b3dc6dbb7914d9b493e41", + "blk.14.post_ffw_norm.weight": "9c6a8da2e4e437d5cfdf3b9097e9f8b64bf07946a048badec20f4d374613f38f", + "blk.15.attn_k.weight": "864bc618303a0e4ee67fb1d5e751de61e936cd51e96669dd86f8cd08f2305045", + "blk.15.attn_norm.weight": "f9f4187da6eeadc2fc5921d8fe669741697d16c13d71e4aaeb73b82f50dc577e", + "blk.15.attn_output.weight": "ce2419a0b097036b2a31f2f4ad731d5814bcc2ef4c511786e24471e5eefd273b", + "blk.15.attn_q.weight": "9539db5a970d11ebe99722d1e13fcd635e250033630811efe583d2f97778e4a9", + "blk.15.attn_v.weight": "1c834b48ccd88adaeabb7d8bcb6be0bcd6d5ac1354ce88fc28f19a1a96b81ab3", + "blk.15.ffn_down.weight": "bc1f97a65dde6fa2c1e5397afb612266944b343f2eaa868b635ddd25829f8a42", + "blk.15.ffn_gate.weight": "1b14529d57056b79037f6cb5008132e62cc35992353b38dda59572274623103b", + "blk.15.ffn_norm.weight": "9af77458de9ee55c66f93865759f9c2c398557f94f3fa8fa6af30543d7339cde", + "blk.15.ffn_up.weight": "41d524a26b61a9595816b4fd53cf57ef50a702e4ef32933ff6136dca9136a267", + "blk.15.post_attention_norm.weight": "c60a03cd0e63a7db5c80015e58e9b97ba2208caa19f66a6fef5c4447eca900ce", + "blk.15.post_ffw_norm.weight": "34f7f9f96769215bbc3d17084df091864aef96a6645b7d0b3b7d9bd92f1a4b0b", + "blk.16.attn_k.weight": "7e27240d9f3a8c6cf0f4a980113d43234f514eadc3e3e1792b86efb29ffb1a6d", + "blk.16.attn_norm.weight": "af798acc0899282a30448edec48223b3e8efda177090273e612d8eca5e377301", + "blk.16.attn_output.weight": "79df39a3709d3d53e84146291e0944a7a653d06705293d9ccb5648dceadb432c", + "blk.16.attn_q.weight": "db58a1c3b83ad294804e5fd7321005719e200659173466df5a52a182b80b7165", + "blk.16.attn_v.weight": "2af6d48cbaeb225b5c1a704f76abd89c8ab1521417695b112b4dcc2cbd39b74d", + "blk.16.ffn_down.weight": "fc1c813eb5e7da3d6194569d6cb21602fc6eff2dc8e1b0eb753f2d5df148189c", + "blk.16.ffn_gate.weight": "7a80bcbc42464bd55df4814a6edbd7b5c153e0428323bbe49de55e2d2add33e7", + "blk.16.ffn_norm.weight": "2041685ee926d30f3f2ae4ec35b5688f1cd834167a6359a7d4057eac804c58b2", + "blk.16.ffn_up.weight": "8da4b718973ac1d43b928829bc45e062fd101984d6c98dd825bd7c5d08ebfbe3", + "blk.16.post_attention_norm.weight": "975c48fe680a6167438a106140a8872eee7765191f152d80e3b8ddf47693e095", + "blk.16.post_ffw_norm.weight": "4de2d4d483acfe4fc77860ea929025df2f4e15c10729413f36a18c94eaa6d689", + "blk.17.attn_k.weight": "f937e61f0af8c4cd98ee742648eb60e02e579683e21d421071295a3b70aebaad", + "blk.17.attn_norm.weight": "c3270583ed28b7e423f5b170c59113234f258169b93a867d9274f4c10b7cb115", + "blk.17.attn_output.weight": "b8c1150e81e685e539a5dcf2c19047a24eba2b281fabe166674b1d71ef4612ea", + "blk.17.attn_q.weight": "c255100ae2011e7dc7e3bf3bc3ccd96d859fbb98581cae993d7b82c1ba8e8b39", + "blk.17.attn_v.weight": "5830bb0a555984c6485348067f70b5d22ae337c011aa9248dac2ff4c95944551", + "blk.17.ffn_down.weight": "8ff9a7cccaa3776434a9d895aae4fb5c36c736bf2ec98784226b4c234940fbb0", + "blk.17.ffn_gate.weight": "1b52876739712831c272911533da206f407b46034a1a4ae8a88c1f96b6bd5747", + "blk.17.ffn_norm.weight": "d0e16ba5e87c91b545334e022058c7d03849665c3b1a6298771b656531366b66", + "blk.17.ffn_up.weight": "4dd6211d01dbebbe21052708eddc242b082a58b5f18ed16479e17987c1d3432e", + "blk.17.post_attention_norm.weight": "6f49c775c7417dade77ba8268a0f8441c1e5ec28b5d7e4dc5ed07a04d04600c8", + "blk.17.post_ffw_norm.weight": "b91a0bb2e6679e9c9be06ad323adae441d00a3d673efb19d7c4954be2aa84b27", + "blk.18.attn_k.weight": "22b565ace1b4da8b33865a58625be1d90beea9891f29686a69fa9cf7c93217db", + "blk.18.attn_norm.weight": "3e0160d7063c8753de65d2356a66648e47d921efdc5c917efb8209892120f8db", + "blk.18.attn_output.weight": "e3180f0bb4ca90b31e9b08158db38e332de62dfbaefe34aa94cc316409331e09", + "blk.18.attn_q.weight": "f3a5a83614c3ba7ea41cdd5b1b0819a241ee2a951a381ce4a9e001d3f700ed8f", + "blk.18.attn_v.weight": "f3350a5984fb951fc738adcf78147e6d812ff1c576670c460cafc99c253c1654", + "blk.18.ffn_down.weight": "9e9d09b13a33525e14bdaee6efc65c551ac7cf7680e534b940ab122a3a7c1ac9", + "blk.18.ffn_gate.weight": "ebaec8b4b578a2e8d815baac12f1675c208f80c68074d5a18288a2e1a60680ee", + "blk.18.ffn_norm.weight": "33e7687c53a242f2f8dc7093a491c97b18d4a5a8c14d183f02bd586a770f05aa", + "blk.18.ffn_up.weight": "78a1816662378ce56cc870e705174492781897b3afd2d4d97a51f10f2f2987c1", + "blk.18.post_attention_norm.weight": "a58dde3f12df3e94cbc27d87c8ea86f89af8a388a506446ff6758f05399b05fc", + "blk.18.post_ffw_norm.weight": "cebf90cc143577d483cca27b032dfd82031ee59bdf17c0e2cf60a0a3ad5bf996", + "blk.19.attn_k.weight": "4683375d0599ac9e2232196aae1e90af13a14cae26e865465de5c8e257bb2055", + "blk.19.attn_norm.weight": "f3eba936bfb1814bbcb0a1d62739eb66daac839df8c9c836fe0e94860df88525", + "blk.19.attn_output.weight": "51c0f01d38a9dcfe9bdbc4643576fab164c1d9e4b7168b7695c0ee55e6965667", + "blk.19.attn_q.weight": "28d15b69b8416f2e7ddc88fe381cb1e2ef2ad705fb1c268139ba96498cc74848", + "blk.19.attn_v.weight": "6860f1cd720638e63a981fa2c0b4db900129826bcb9823c9ddf9fb8b1b9f3383", + "blk.19.ffn_down.weight": "bc7f2d7827ee01c2dd41401c7b3b1700ad3a4ff620e8bb734f92630d342dcc7f", + "blk.19.ffn_gate.weight": "54d03ef69ba373fc410fbca8f1e34a565d58e4296d9a035ff7e48340b9c848e7", + "blk.19.ffn_norm.weight": "9178fc796a340ee6e8128ca74c0cb6203d1adbed6927af4e5ac7863da57affc7", + "blk.19.ffn_up.weight": "a77bd708026c6e83ad5c79c223278e74621bcf74a9641c7818d96b595daaad20", + "blk.19.post_attention_norm.weight": "ae94aa26f4c411bf9496a6fd4a6df64ee589ee1ae9a04b531d45acc95721e582", + "blk.19.post_ffw_norm.weight": "9ad210700edeef12133bdcff04bf1c7f62b49f6f4a9ba483c7cdc59857c24a5c", + "blk.20.attn_k.weight": "e35bce1e9f4a7a09ef34721f57ea38cfca68c272f52d923fe50af8308f66cfaa", + "blk.20.attn_norm.weight": "644800f6926fd34f233795c4dec1151a295d2138ca8cac33e3e48167d26f8b41", + "blk.20.attn_output.weight": "8d3758cd236471741e1ad66c0710cb79077dc8c7a3a292d35bc551c0c5abe627", + "blk.20.attn_q.weight": "c333b1f0f6f956b5d73891df10b1a0321e55fc31c40d623a24e1f52caa6a998b", + "blk.20.attn_v.weight": "8562b418d0c4868a050fb19fa3fcaf50a8cf1c669f537d666c80c7b3a04714e1", + "blk.20.ffn_down.weight": "97efb608ac44cc804198faec3ee66eafe56ced6b7ca5359700c6f1df75b7205e", + "blk.20.ffn_gate.weight": "5c61151d86f28415c73c73d90ec088c646cbe5c1640197caf58eb501ba7db293", + "blk.20.ffn_norm.weight": "24bbe0a701afd4bbeea65b3edde712b3cbb2281043bbc43dbf250582453116ed", + "blk.20.ffn_up.weight": "e170cf68e249566aa99eb6f6b265679bf9a5a6b76830ba24e7e130c2515910c4", + "blk.20.post_attention_norm.weight": "e092d751cfe20dbf2d348358f3b38397bd83e4ed94d6bbaa6bbaddcd902b2ac4", + "blk.20.post_ffw_norm.weight": "219a18a47dcba76e669e4322223a5a9227bd3db1de3fbd3d3cfb22e54a783c5a", + "blk.21.attn_k.weight": "c3a095ebddb42c63824f1c98da65263dc88e4d790a26aa1632840b44f5cc7cb1", + "blk.21.attn_norm.weight": "ef8bbaded5fbc45ad9cf3985ae02174524e7090fe6362811124f942ef643bec7", + "blk.21.attn_output.weight": "668f018aba72baac6252aa3ad58569ddd55ab751a0dd8d7bcc9fb9b6efb4bf53", + "blk.21.attn_q.weight": "e759c65663089f3bbbd51847934c185e680c82f1249065d5d487da638e519e6d", + "blk.21.attn_v.weight": "2ff57762686cf9ba1f5a6be76503454b97556ce67f4ac98254bd0562231197ba", + "blk.21.ffn_down.weight": "3fd106556fb721b1c28ae3f4026bc83eb1b08ed910f2ba5f466c6b5f327d91cb", + "blk.21.ffn_gate.weight": "338022d882f4b6619e8054a6fb909696fa3eef3013cf69b65c3cacdfc5b9e42c", + "blk.21.ffn_norm.weight": "1e77660c23a3f9653ee721a863d1960f773d87437cabc4dc0a6e17ee3d4e5e44", + "blk.21.ffn_up.weight": "7d31b20fbc2e6eba8f350f170069dc36f0cb12f68fbc4206ec5022a74085ebcb", + "blk.21.post_attention_norm.weight": "9638bae8d8bdcd7ed68da282979cd84a07c41ff9cabcaea94ebc846a1803db23", + "blk.21.post_ffw_norm.weight": "d622ef11115fe0cbe04b727d5a3b6371e7f39bf08c8d5eb9bc6da52e3f3cfb9d", + "blk.22.attn_k.weight": "5c321cb29deffbe57de200dd206a62005f1e80acb86c4fd2349dd44c8d3594fd", + "blk.22.attn_norm.weight": "198d949705d7170a331d75889d8c7500c3635254dac2cc6aa4dc35d556584536", + "blk.22.attn_output.weight": "19805cd5d7025b457e5d41d70db8b3fd63c2dd0e4a94d3ef1704d50ef4e749e8", + "blk.22.attn_q.weight": "177836cd583fc87405975ddc21ebfebdaa090a0363799664c72caa3da851ae2c", + "blk.22.attn_v.weight": "fea255692483e30d0108f9e4e250eb3ed7dbda8d83f499b06519b8c223ae6096", + "blk.22.ffn_down.weight": "00cb8939f03e5817d6d412de8cf2c923c9568d5493e382cec7faf5718fb034eb", + "blk.22.ffn_gate.weight": "b0591065b91281b2fbd8a9567f3568d40479f680e1f0a29e27ae213f37642489", + "blk.22.ffn_norm.weight": "96b5c5d0737c2ceb8fc869f54adb9e5f46e28cb7b177c40f49fa926b923c00f8", + "blk.22.ffn_up.weight": "81f472185b24344ab0594ea8246cc6e200e0dc1cab4943e74fbe4ca19d5a9701", + "blk.22.post_attention_norm.weight": "27fa9aa6260aa3071e0391e1a1d49322dcb6e8072315b8a9b7064087108dbd06", + "blk.22.post_ffw_norm.weight": "f37e1dcd7f643d9545675ffe9dc527a11eba86eb204989c2f44f636b266d896a", + "blk.23.attn_k.weight": "5d82f36658a56c3f94d0bb2d61f65509c966fa6568f81812e0d3e338b380ef8c", + "blk.23.attn_norm.weight": "b7983f88d9cad88bc88a528923e6da592ad20e699965b223ebc10840fe1f4fec", + "blk.23.attn_output.weight": "59f97f80f430d71606aab0158a195aed29ccd3405e6c0a5c41c809be8eb01898", + "blk.23.attn_q.weight": "53ac4789fe958919cc02ea4222bcd64c0ea1b4baa54304bff46635bdf42f7490", + "blk.23.attn_v.weight": "ec8abe09b9e84dbb52c7a068094657c6d3c62fe551ba8d7c3a3f23da622e9756", + "blk.23.ffn_down.weight": "3cf547eccb1b82aa64f208cee9682d7f558ca84e0aead7d9d3d1420d90f3d992", + "blk.23.ffn_gate.weight": "366aa2486d911ba81eb519119e13807deacf7e9908bc1975a2a63e00d6b10124", + "blk.23.ffn_norm.weight": "6d1d4a4af34bb7dc090ac87d6457d398c3e0fb68bd2e2b60b099dc318b6cfac3", + "blk.23.ffn_up.weight": "53f76692e253f5d2420b3f200c731b9f3b7a83e379920b4a067c729b4674aa4d", + "blk.23.post_attention_norm.weight": "7c952fa0efa76b3f048c8c4c9e8dcb5e3724d231327eda6423a34d3f3d3367de", + "blk.23.post_ffw_norm.weight": "7ab188cfe61f0a91b40309a0ab6bfa99f19d0ff2a37b6ac10e5f0c7f44eb5270", + "blk.24.attn_k.weight": "225798792f9bfdd10eff0505ebe61e0aad0209c17b431f6044ee7968ffe8c198", + "blk.24.attn_norm.weight": "635e3c1ebf5219bbebfc40ef164bc32d2b726ef595a94da64ac524ae878e2915", + "blk.24.attn_output.weight": "482f5bb2db8d9ed22b253d9a3296333b239efe698e5992e5d77e7e12dc2a5cf5", + "blk.24.attn_q.weight": "43805bbccddb65d58fffc4be9b5c374d4e1df1395ec1e1ffb4bcff03e98d5adb", + "blk.24.attn_v.weight": "fa741af54b4a3b1775d32f59134756090c5df2e7345a12a2d8db94fe289667a7", + "blk.24.ffn_down.weight": "83c6351e3162626b276f524a57836144625c2556dbe321b57cbd8fd486a68fab", + "blk.24.ffn_gate.weight": "fbe66be0d84d12cea5176cc7eaef64382ffc7324cd9d6266a3342dc43442f2ac", + "blk.24.ffn_norm.weight": "77c1445a8639ad24938bdf0280233eea2362d47391421833dfa72ec756dfc1e8", + "blk.24.ffn_up.weight": "78235ac729ee23c1cf1ae543751e3af32776d8808cee6e529c2a625a1f027654", + "blk.24.post_attention_norm.weight": "161f71b6d07628d43e4ae51a4c9088ec6ca2db123a17986a14505d83fdd04dad", + "blk.24.post_ffw_norm.weight": "cf1ba692aa683368b02ac413e69b2521b98c69a5274eacbb54165b53bf38a8b2", + "blk.25.attn_k.weight": "057a56bd8c8d2b41608d1f71faa3052902152ddf85e47669ad950c1c3e77c33f", + "blk.25.attn_norm.weight": "b7179fe02c334da556ddcf6c1b502245639a728c4cbba8b552d8e1df4565ee9d", + "blk.25.attn_output.weight": "4fed8b05b08a0ff75ffd022701bbeb52f17b23d09332a1ddcba737244bd0d3b0", + "blk.25.attn_q.weight": "c52e99f5d38bf7538d6106a0bbf38ac6dc6296bca9a3f849afa384ea67b4af01", + "blk.25.attn_v.weight": "c49c23d8e1cfa6a8eb971eb69942204890c6d7d830dc8774c84b108a80598912", + "blk.25.ffn_down.weight": "c08d4dc8412b19fdc870c164b83c341b236ec6fe7bb4a9bcfe0dc100faa20286", + "blk.25.ffn_gate.weight": "1a4cb3f36735d59181721471452807903006539e5e1b5ceb4f72d1d7ae134127", + "blk.25.ffn_norm.weight": "8fd6bd0dcec5198761525a36992a57c9ec5e9da60a22092839a84ae8c4e87f26", + "blk.25.ffn_up.weight": "3a00f39bdd5f31dc5e3b281d2002e1ac4f2475d49a0ac1d7720a25b377dcd04a", + "blk.25.post_attention_norm.weight": "e5f31a648612c859b6d21c9ee426e87a86cb1973dfdd86276c767371d9cef5ad", + "blk.25.post_ffw_norm.weight": "553c3bd774922c99c2384380a142d019881d30dbf0fe3bf9430dabfb3f6cbd33", + "output_norm.weight": "49445c4585ab0a8135717a0bdb1cda4a062a030177d0119561d91542aec5744b" +} diff --git a/docs/gpu.md b/docs/gpu.md index e669ea32..2913a2e2 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -10,7 +10,7 @@ Check your compute compatibility to see if your card is supported: | 9.0 | NVIDIA | `H100` | | 8.9 | GeForce RTX 40xx | `RTX 4090` `RTX 4080 SUPER` `RTX 4080` `RTX 4070 Ti SUPER` `RTX 4070 Ti` `RTX 4070 SUPER` `RTX 4070` `RTX 4060 Ti` `RTX 4060` | | | NVIDIA Professional | `L4` `L40` `RTX 6000` | -| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` | +| 8.6 | GeForce RTX 30xx | `RTX 3090 Ti` `RTX 3090` `RTX 3080 Ti` `RTX 3080` `RTX 3070 Ti` `RTX 3070` `RTX 3060 Ti` `RTX 3060` `RTX 3050 Ti` `RTX 3050` | | | NVIDIA Professional | `A40` `RTX A6000` `RTX A5000` `RTX A4000` `RTX A3000` `RTX A2000` `A10` `A16` `A2` | | 8.0 | NVIDIA | `A100` `A30` | | 7.5 | GeForce GTX/RTX | `GTX 1650 Ti` `TITAN RTX` `RTX 2080 Ti` `RTX 2080` `RTX 2070` `RTX 2060` | diff --git a/docs/linux.md b/docs/linux.md index 46c17a20..0eec014f 100644 --- a/docs/linux.md +++ b/docs/linux.md @@ -1,43 +1,57 @@ -# Ollama on Linux +# Linux ## Install -Install Ollama running this one-liner: +To install Ollama, run the following command: -> - -```bash +```shell curl -fsSL https://ollama.com/install.sh | sh ``` -## AMD Radeon GPU support - -While AMD has contributed the `amdgpu` driver upstream to the official linux -kernel source, the version is older and may not support all ROCm features. We -recommend you install the latest driver from -https://www.amd.com/en/support/linux-drivers for best support of your Radeon -GPU. - ## Manual install -### Download `ollama` +Download and extract the package: -Download and extract the Linux package: - -```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr +```shell +curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz +sudo tar -C /usr -xzf ollama-linux-amd64.tgz ``` -If you have an AMD GPU, also download and extract the ROCm package into the same location -```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64-rocm.tgz | sudo tar zx -C /usr +Start Ollama: + +```shell +ollama serve +``` + +In another terminal, verify that Ollama is running: + +```shell +ollama -v +``` + +### AMD GPU install + +If you have an AMD GPU, also download and extract the additional ROCm package: + +```shell +curl -L https://ollama.com/download/ollama-linux-amd64-rocm.tgz -o ollama-linux-amd64-rocm.tgz +sudo tar -C /usr -xzf ollama-linux-amd64-rocm.tgz +``` + +### ARM64 install + +Download and extract the ARM64-specific package: + +```shell +curl -L https://ollama.com/download/ollama-linux-arm64.tgz -o ollama-linux-arm64.tgz +sudo tar -C /usr -xzf ollama-linux-arm64.tgz ``` ### Adding Ollama as a startup service (recommended) Create a user and group for Ollama: -```bash +```shell sudo useradd -r -s /bin/false -U -m -d /usr/share/ollama ollama sudo usermod -a -G ollama $(whoami) ``` @@ -63,47 +77,54 @@ WantedBy=default.target Then start the service: -```bash +```shell sudo systemctl daemon-reload sudo systemctl enable ollama ``` -### Install CUDA drivers (optional – for Nvidia GPUs) +### Install CUDA drivers (optional) [Download and install](https://developer.nvidia.com/cuda-downloads) CUDA. Verify that the drivers are installed by running the following command, which should print details about your GPU: -```bash +```shell nvidia-smi ``` -### Install ROCm (optional - for Radeon GPUs) -[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) +### Install AMD ROCm drivers (optional) -Make sure to install ROCm v6 +[Download and Install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) ROCm v6. ### Start Ollama Start Ollama and verify it is running: -```bash +```shell sudo systemctl start ollama sudo systemctl status ollama ``` -## Update +> [!NOTE] +> While AMD has contributed the `amdgpu` driver upstream to the official linux +> kernel source, the version is older and may not support all ROCm features. We +> recommend you install the latest driver from +> https://www.amd.com/en/support/linux-drivers for best support of your Radeon +> GPU. -Update ollama by running the install script again: +## Updating -```bash +Update Ollama by running the install script again: + +```shell curl -fsSL https://ollama.com/install.sh | sh ``` -Or by downloading the ollama binary: +Or by re-downloading Ollama: -```bash -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr +```shell +curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz +sudo tar -C /usr -xzf ollama-linux-amd64.tgz ``` ## Installing specific versions @@ -112,15 +133,15 @@ Use `OLLAMA_VERSION` environment variable with the install script to install a s For example: -``` -curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.1.32 sh +```shell +curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.3.9 sh ``` ## Viewing logs To view logs of Ollama running as a startup service, run: -```bash +```shell journalctl -e -u ollama ``` @@ -128,7 +149,7 @@ journalctl -e -u ollama Remove the ollama service: -```bash +```shell sudo systemctl stop ollama sudo systemctl disable ollama sudo rm /etc/systemd/system/ollama.service @@ -136,13 +157,13 @@ sudo rm /etc/systemd/system/ollama.service Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`): -```bash +```shell sudo rm $(which ollama) ``` Remove the downloaded models and Ollama service user and group: -```bash +```shell sudo rm -r /usr/share/ollama sudo userdel ollama sudo groupdel ollama diff --git a/docs/windows.md b/docs/windows.md index dbfc1440..f681ffac 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -48,6 +48,9 @@ the explorer window by hitting `+R` and type in: - `explorer %HOMEPATH%\.ollama` contains models and configuration - `explorer %TEMP%` contains temporary executable files in one or more `ollama*` directories +## Uninstall + +The Ollama Windows installer registers an Uninstaller application. Under `Add or remove programs` in Windows Settings, you can uninstall Ollama. ## Standalone CLI diff --git a/envconfig/config.go b/envconfig/config.go index 908636a9..14e3cb0c 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -112,6 +112,26 @@ func KeepAlive() (keepAlive time.Duration) { return keepAlive } +// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable. +// Zero or Negative values are treated as infinite. +// Default is 5 minutes. +func LoadTimeout() (loadTimeout time.Duration) { + loadTimeout = 5 * time.Minute + if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" { + if d, err := time.ParseDuration(s); err == nil { + loadTimeout = d + } else if n, err := strconv.ParseInt(s, 10, 64); err == nil { + loadTimeout = time.Duration(n) * time.Second + } + } + + if loadTimeout <= 0 { + return time.Duration(math.MaxInt64) + } + + return loadTimeout +} + func Bool(k string) func() bool { return func() bool { if s := Var(k); s != "" { @@ -231,6 +251,23 @@ var ( MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0) ) +func Uint64(key string, defaultValue uint64) func() uint64 { + return func() uint64 { + if s := Var(key); s != "" { + if n, err := strconv.ParseUint(s, 10, 64); err != nil { + slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue) + } else { + return n + } + } + + return defaultValue + } +} + +// Set aside VRAM per GPU +var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0) + type EnvVar struct { Name string Value any @@ -241,9 +278,11 @@ func AsMap() map[string]EnvVar { ret := map[string]EnvVar{ "OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"}, "OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"}, + "OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"}, "OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"}, "OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"}, "OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"}, + "OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"}, "OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"}, "OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"}, "OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"}, diff --git a/envconfig/config_test.go b/envconfig/config_test.go index d52a98a5..7ac7c53e 100644 --- a/envconfig/config_test.go +++ b/envconfig/config_test.go @@ -215,6 +215,40 @@ func TestKeepAlive(t *testing.T) { } } +func TestLoadTimeout(t *testing.T) { + defaultTimeout := 5 * time.Minute + cases := map[string]time.Duration{ + "": defaultTimeout, + "1s": time.Second, + "1m": time.Minute, + "1h": time.Hour, + "5m0s": defaultTimeout, + "1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second, + "0": time.Duration(math.MaxInt64), + "60": 60 * time.Second, + "120": 2 * time.Minute, + "3600": time.Hour, + "-0": time.Duration(math.MaxInt64), + "-1": time.Duration(math.MaxInt64), + "-1m": time.Duration(math.MaxInt64), + // invalid values + " ": defaultTimeout, + "???": defaultTimeout, + "1d": defaultTimeout, + "1y": defaultTimeout, + "1w": defaultTimeout, + } + + for tt, expect := range cases { + t.Run(tt, func(t *testing.T) { + t.Setenv("OLLAMA_LOAD_TIMEOUT", tt) + if actual := LoadTimeout(); actual != expect { + t.Errorf("%s: expected %s, got %s", tt, expect, actual) + } + }) + } +} + func TestVar(t *testing.T) { cases := map[string]string{ "value": "value", diff --git a/llm/ggml.go b/llm/ggml.go index ab436095..c4475a94 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -360,11 +360,13 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui switch llm.KV().Architecture() { case "llama": - fullOffload = 4 * batch * (1 + 4*embedding + context*(1+heads)) + fullOffload = max( + 4*batch*(1+4*embedding+context*(1+heads)), + 4*batch*(embedding+vocab), + ) partialOffload = 4 * batch * embedding partialOffload += max( - // 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()), 4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV), 4*batch*(embedding+vocab)+embedding*vocab*105/128, ) diff --git a/llm/memory.go b/llm/memory.go index 19b12cbf..99db7629 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" ) @@ -94,6 +95,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts // Overflow that didn't fit into the GPU var overflow uint64 + overhead := envconfig.GpuOverhead() availableList := make([]string, len(gpus)) for i, gpu := range gpus { availableList[i] = format.HumanBytes2(gpu.FreeMemory) @@ -164,8 +166,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts gzo = gpuZeroOverhead } // Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer - if gpus[i].FreeMemory < gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize { - slog.Debug("gpu has too little memory to allocate any layers", "gpu", gpus[i]) + if (gpus[i].FreeMemory - overhead) < gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize { + slog.Debug("gpu has too little memory to allocate any layers", + "id", gpus[i].ID, + "library", gpus[i].Library, + "variant", gpus[i].Variant, + "compute", gpus[i].Compute, + "driver", fmt.Sprintf("%d.%d", gpus[i].DriverMajor, gpus[i].DriverMinor), + "name", gpus[i].Name, + "total", format.HumanBytes2(gpus[i].TotalMemory), + "available", format.HumanBytes2(gpus[i].FreeMemory), + "minimum_memory", gpus[i].MinimumMemory, + "layer_size", format.HumanBytes2(layerSize), + "gpu_zer_overhead", format.HumanBytes2(gzo), + "partial_offload", format.HumanBytes2(graphPartialOffload), + "full_offload", format.HumanBytes2(graphFullOffload), + ) continue } gpusWithSpace = append(gpusWithSpace, gs{i, &gpus[i]}) @@ -196,7 +212,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts for j := len(gpusWithSpace); j > 0; j-- { g := gpusWithSpace[i%j] used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload) - if g.g.FreeMemory > used+layerSize { + if (g.g.FreeMemory - overhead) > used+layerSize { gpuAllocations[g.i] += layerSize layerCounts[g.i]++ layerCount++ @@ -219,7 +235,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts for j := len(gpusWithSpace); j > 0; j-- { g := gpusWithSpace[layerCount%j] used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload) - if g.g.FreeMemory > used+memoryLayerOutput { + if (g.g.FreeMemory - overhead) > used+memoryLayerOutput { gpuAllocations[g.i] += memoryLayerOutput layerCounts[g.i]++ layerCount++ @@ -306,6 +322,7 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts } func (m MemoryEstimate) log() { + overhead := envconfig.GpuOverhead() slog.Info( "offload to "+m.inferenceLibrary, slog.Group( @@ -323,6 +340,7 @@ func (m MemoryEstimate) log() { "memory", // memory available by GPU for offloading "available", m.availableList, + "gpu_overhead", format.HumanBytes2(overhead), slog.Group( "required", // memory required for full offloading diff --git a/llm/server.go b/llm/server.go index 9c08f1bb..28eb8d6f 100644 --- a/llm/server.go +++ b/llm/server.go @@ -584,8 +584,7 @@ func (s *llmServer) Ping(ctx context.Context) error { func (s *llmServer) WaitUntilRunning(ctx context.Context) error { start := time.Now() - stallDuration := 5 * time.Minute // If no progress happens - finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online + stallDuration := envconfig.LoadTimeout() // If no progress happens stallTimer := time.Now().Add(stallDuration) // give up if we stall slog.Info("waiting for llama runner to start responding") @@ -637,7 +636,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error { stallTimer = time.Now().Add(stallDuration) } else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 { slog.Debug("model load completed, waiting for server to become available", "status", status.ToString()) - stallTimer = time.Now().Add(finalLoadDuration) + stallTimer = time.Now().Add(stallDuration) fullyLoaded = true } time.Sleep(time.Millisecond * 250) diff --git a/openai/openai.go b/openai/openai.go index bda42b4d..ea540257 100644 --- a/openai/openai.go +++ b/openai/openai.go @@ -79,7 +79,7 @@ type ChatCompletionRequest struct { Stop any `json:"stop"` Temperature *float64 `json:"temperature"` FrequencyPenalty *float64 `json:"frequency_penalty"` - PresencePenalty *float64 `json:"presence_penalty_penalty"` + PresencePenalty *float64 `json:"presence_penalty"` TopP *float64 `json:"top_p"` ResponseFormat *ResponseFormat `json:"response_format"` Tools []api.Tool `json:"tools"` @@ -513,7 +513,7 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) { } if r.Temperature != nil { - options["temperature"] = *r.Temperature * 2.0 + options["temperature"] = *r.Temperature } else { options["temperature"] = 1.0 } @@ -522,9 +522,9 @@ func fromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) { options["seed"] = *r.Seed } - options["frequency_penalty"] = r.FrequencyPenalty * 2.0 + options["frequency_penalty"] = r.FrequencyPenalty - options["presence_penalty"] = r.PresencePenalty * 2.0 + options["presence_penalty"] = r.PresencePenalty if r.TopP != 0.0 { options["top_p"] = r.TopP diff --git a/openai/openai_test.go b/openai/openai_test.go index c7e9f384..25f570a1 100644 --- a/openai/openai_test.go +++ b/openai/openai_test.go @@ -22,7 +22,10 @@ const ( image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=` ) -var False = false +var ( + False = false + True = true +) func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc { return func(c *gin.Context) { @@ -70,6 +73,44 @@ func TestChatMiddleware(t *testing.T) { Stream: &False, }, }, + { + name: "chat handler with options", + body: `{ + "model": "test-model", + "messages": [ + {"role": "user", "content": "Hello"} + ], + "stream": true, + "max_tokens": 999, + "seed": 123, + "stop": ["\n", "stop"], + "temperature": 3.0, + "frequency_penalty": 4.0, + "presence_penalty": 5.0, + "top_p": 6.0, + "response_format": {"type": "json_object"} + }`, + req: api.ChatRequest{ + Model: "test-model", + Messages: []api.Message{ + { + Role: "user", + Content: "Hello", + }, + }, + Options: map[string]any{ + "num_predict": 999.0, // float because JSON doesn't distinguish between float and int + "seed": 123.0, + "stop": []any{"\n", "stop"}, + "temperature": 6.0, + "frequency_penalty": 8.0, + "presence_penalty": 10.0, + "top_p": 6.0, + }, + Format: "json", + Stream: &True, + }, + }, { name: "chat handler with image content", body: `{ @@ -186,6 +227,8 @@ func TestChatMiddleware(t *testing.T) { req, _ := http.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(tc.body)) req.Header.Set("Content-Type", "application/json") + defer func() { capturedRequest = nil }() + resp := httptest.NewRecorder() router.ServeHTTP(resp, req) @@ -202,7 +245,6 @@ func TestChatMiddleware(t *testing.T) { if !reflect.DeepEqual(tc.err, errResp) { t.Fatal("errors did not match") } - capturedRequest = nil }) } } @@ -233,7 +275,7 @@ func TestCompletionsMiddleware(t *testing.T) { Options: map[string]any{ "frequency_penalty": 0.0, "presence_penalty": 0.0, - "temperature": 1.6, + "temperature": 0.8, "top_p": 1.0, "stop": []any{"\n", "stop"}, }, diff --git a/scripts/install.sh b/scripts/install.sh index 5a212975..79a7b564 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -356,12 +356,12 @@ if ! lsmod | grep -q nvidia || ! lsmod | grep -q nvidia_uvm; then fi # make sure the NVIDIA modules are loaded on boot with nvidia-persistenced -if command -v nvidia-persistenced > /dev/null 2>&1; then +if available nvidia-persistenced; then $SUDO touch /etc/modules-load.d/nvidia.conf MODULES="nvidia nvidia-uvm" for MODULE in $MODULES; do if ! grep -qxF "$MODULE" /etc/modules-load.d/nvidia.conf; then - echo "$MODULE" | sudo tee -a /etc/modules-load.d/nvidia.conf > /dev/null + echo "$MODULE" | $SUDO tee -a /etc/modules-load.d/nvidia.conf > /dev/null fi done fi diff --git a/server/download.go b/server/download.go index 02f7ae88..a3b53189 100644 --- a/server/download.go +++ b/server/download.go @@ -256,7 +256,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis continue } defer resp.Body.Close() - if resp.StatusCode != http.StatusTemporaryRedirect { + if resp.StatusCode != http.StatusTemporaryRedirect && resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("unexpected status code %d", resp.StatusCode) } return resp.Location()