Compare commits
1 Commits
main
...
build_dist
Author | SHA1 | Date | |
---|---|---|---|
|
7893ccb68c |
17
.github/workflows/test.yaml
vendored
17
.github/workflows/test.yaml
vendored
@ -73,12 +73,12 @@ jobs:
|
|||||||
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
|
||||||
$env:PATH="$gopath;$gccpath;$env:PATH"
|
$env:PATH="$gopath;$gccpath;$env:PATH"
|
||||||
echo $env:PATH
|
echo $env:PATH
|
||||||
go generate -x ./...
|
$env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
|
||||||
if: ${{ startsWith(matrix.os, 'windows-') }}
|
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||||
name: 'Windows Go Generate'
|
name: 'Windows Generate'
|
||||||
- run: go generate -x ./...
|
- run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
|
||||||
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||||
name: 'Unix Go Generate'
|
name: 'Unix Generate'
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
||||||
@ -184,7 +184,7 @@ jobs:
|
|||||||
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
$env:OLLAMA_SKIP_CPU_GENERATE="1"
|
||||||
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||||
go generate -x ./...
|
go generate -x ./...
|
||||||
name: go generate
|
name: go generate -x ./...
|
||||||
env:
|
env:
|
||||||
OLLAMA_SKIP_CPU_GENERATE: '1'
|
OLLAMA_SKIP_CPU_GENERATE: '1'
|
||||||
# TODO - do we need any artifacts?
|
# TODO - do we need any artifacts?
|
||||||
@ -217,7 +217,7 @@ jobs:
|
|||||||
- name: 'Verify CUDA'
|
- name: 'Verify CUDA'
|
||||||
run: nvcc -V
|
run: nvcc -V
|
||||||
- run: go get ./...
|
- run: go get ./...
|
||||||
- name: go generate
|
- name: go generate -x ./...
|
||||||
run: |
|
run: |
|
||||||
$gopath=(get-command go).source | split-path -parent
|
$gopath=(get-command go).source | split-path -parent
|
||||||
$cudabin=(get-command nvcc).source | split-path
|
$cudabin=(get-command nvcc).source | split-path
|
||||||
@ -312,7 +312,10 @@ jobs:
|
|||||||
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
|
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
|
||||||
if: ${{ startsWith(matrix.os, 'macos-') }}
|
if: ${{ startsWith(matrix.os, 'macos-') }}
|
||||||
shell: bash
|
shell: bash
|
||||||
- run: go generate ./...
|
- run: $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
|
||||||
|
if: ${{ startsWith(matrix.os, 'windows-') }}
|
||||||
|
- run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
|
||||||
|
if: ${{ ! startsWith(matrix.os, 'windows-') }}
|
||||||
- run: go build
|
- run: go build
|
||||||
- run: go test -v ./...
|
- run: go test -v ./...
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
|
75
README.md
75
README.md
@ -1,12 +1,12 @@
|
|||||||
<div align="center">
|
<div align="center">
|
||||||
<img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
|
<img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
# Ollama
|
# Ollama
|
||||||
|
|
||||||
[](https://discord.gg/ollama)
|
[](https://discord.gg/ollama)
|
||||||
|
|
||||||
Get up and running with large language models.
|
Get up and running with large language models locally.
|
||||||
|
|
||||||
### macOS
|
### macOS
|
||||||
|
|
||||||
@ -51,17 +51,15 @@ Here are some example models that can be downloaded:
|
|||||||
| ------------------ | ---------- | ----- | ------------------------------ |
|
| ------------------ | ---------- | ----- | ------------------------------ |
|
||||||
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
|
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
|
||||||
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
|
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
|
||||||
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
|
| Phi-3 | 3,8B | 2.3GB | `ollama run phi3` |
|
||||||
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
|
|
||||||
| Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
|
|
||||||
| Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
|
|
||||||
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
| Mistral | 7B | 4.1GB | `ollama run mistral` |
|
||||||
| Moondream 2 | 1.4B | 829MB | `ollama run moondream` |
|
|
||||||
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
|
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
|
||||||
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
|
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
|
||||||
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
|
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
|
||||||
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
|
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
|
||||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||||
|
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
||||||
|
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
||||||
| Solar | 10.7B | 6.1GB | `ollama run solar` |
|
| Solar | 10.7B | 6.1GB | `ollama run solar` |
|
||||||
|
|
||||||
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
|
||||||
@ -175,19 +173,13 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
|
|||||||
The image features a yellow smiley face, which is likely the central focus of the picture.
|
The image features a yellow smiley face, which is likely the central focus of the picture.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Pass the prompt as an argument
|
### Pass in prompt as arguments
|
||||||
|
|
||||||
```
|
```
|
||||||
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
$ ollama run llama3 "Summarize this file: $(cat README.md)"
|
||||||
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Show model information
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama show llama3
|
|
||||||
```
|
|
||||||
|
|
||||||
### List models on your computer
|
### List models on your computer
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -200,7 +192,19 @@ ollama list
|
|||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
|
Install `cmake` and `go`:
|
||||||
|
|
||||||
|
```
|
||||||
|
brew install cmake go
|
||||||
|
```
|
||||||
|
|
||||||
|
Then build the binary:
|
||||||
|
|
||||||
|
```
|
||||||
|
go run build.go
|
||||||
|
```
|
||||||
|
|
||||||
|
More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
|
||||||
|
|
||||||
### Running local builds
|
### Running local builds
|
||||||
|
|
||||||
@ -248,7 +252,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
- [Open WebUI](https://github.com/open-webui/open-webui)
|
- [Open WebUI](https://github.com/open-webui/open-webui)
|
||||||
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
|
- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
|
||||||
- [Hollama](https://github.com/fmaclen/hollama)
|
|
||||||
- [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
|
- [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
|
||||||
- [LibreChat](https://github.com/danny-avila/LibreChat)
|
- [LibreChat](https://github.com/danny-avila/LibreChat)
|
||||||
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
|
- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
|
||||||
@ -275,24 +278,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
|
- [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
|
||||||
- [OpenAOE](https://github.com/InternLM/OpenAOE)
|
- [OpenAOE](https://github.com/InternLM/OpenAOE)
|
||||||
- [Odin Runes](https://github.com/leonid20000/OdinRunes)
|
- [Odin Runes](https://github.com/leonid20000/OdinRunes)
|
||||||
- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
|
- [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
|
||||||
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
|
- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
|
||||||
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
|
- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
|
||||||
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
|
- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
|
||||||
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository)
|
- [QA-Pilot: Chat with Code Repository](https://github.com/reid41/QA-Pilot)
|
||||||
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
|
- [ChatOllama: Open Source Chatbot based on Ollama with Knowledge Bases](https://github.com/sugarforever/chat-ollama)
|
||||||
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
|
- [CRAG Ollama Chat: Simple Web Search with Corrective RAG](https://github.com/Nagi-ovo/CRAG-Ollama-Chat)
|
||||||
- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
|
- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
|
||||||
- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold)
|
- [chat: chat web app for teams](https://github.com/swuecho/chat)
|
||||||
- [chat](https://github.com/swuecho/chat) (chat web app for teams)
|
|
||||||
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
|
- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
|
||||||
- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
|
- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
|
||||||
- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
|
|
||||||
- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
|
|
||||||
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
|
|
||||||
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
|
|
||||||
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
|
|
||||||
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
|
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
@ -315,7 +311,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [ShellOracle](https://github.com/djcopley/ShellOracle)
|
- [ShellOracle](https://github.com/djcopley/ShellOracle)
|
||||||
- [tlm](https://github.com/yusufcanb/tlm)
|
- [tlm](https://github.com/yusufcanb/tlm)
|
||||||
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
|
||||||
- [gollama](https://github.com/sammcj/gollama)
|
|
||||||
|
|
||||||
### Database
|
### Database
|
||||||
|
|
||||||
@ -326,20 +321,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
|
||||||
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
|
||||||
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
|
|
||||||
|
|
||||||
### Libraries
|
### Libraries
|
||||||
|
|
||||||
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
|
||||||
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
|
||||||
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
|
||||||
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
|
|
||||||
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
|
||||||
- [LiteLLM](https://github.com/BerriAI/litellm)
|
- [LiteLLM](https://github.com/BerriAI/litellm)
|
||||||
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
|
||||||
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
|
||||||
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
|
||||||
- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
|
|
||||||
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
|
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
|
||||||
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
|
||||||
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
|
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
|
||||||
@ -350,13 +342,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
|
- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
|
||||||
- [Elixir LangChain](https://github.com/brainlid/langchain)
|
- [Elixir LangChain](https://github.com/brainlid/langchain)
|
||||||
- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
|
- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
|
||||||
- [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
|
|
||||||
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
|
- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
|
||||||
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
|
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
|
||||||
- [Testcontainers](https://testcontainers.com/modules/ollama/)
|
- [Testcontainers](https://testcontainers.com/modules/ollama/)
|
||||||
- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
|
|
||||||
- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
|
|
||||||
- [LlamaScript](https://github.com/Project-Llama/llamascript)
|
|
||||||
|
|
||||||
### Mobile
|
### Mobile
|
||||||
|
|
||||||
@ -376,23 +364,18 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
|
- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
|
||||||
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
|
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
|
||||||
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
|
||||||
|
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
||||||
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
|
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
|
||||||
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
|
- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
|
||||||
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
|
- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
|
||||||
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
|
||||||
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
|
||||||
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
|
|
||||||
- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
|
|
||||||
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
|
||||||
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
|
||||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||||
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
|
||||||
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
|
|
||||||
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
|
|
||||||
|
|
||||||
### Supported backends
|
|
||||||
|
|
||||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
|
||||||
|
|
||||||
|
### Supported backends
|
||||||
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||||
|
199
build.go
Normal file
199
build.go
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
//go:build ignore
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Flags
|
||||||
|
var (
|
||||||
|
flagRegenerateDestroy = flag.Bool("d", false, "force regenerate the dependencies (destructive)")
|
||||||
|
flagRegenerateGently = flag.Bool("g", false, "regenerate the dependencies (non-destructive)")
|
||||||
|
flagSkipBuild = flag.Bool("s", false, "generate dependencies only (e.g. skip 'go build .')")
|
||||||
|
|
||||||
|
// Flags to set GOARCH explicitly for cross-platform builds,
|
||||||
|
// e.g., in CI to target a different platform than the build matrix
|
||||||
|
// default. These allows us to run generate without a separate build
|
||||||
|
// step for building the script binary for the host ARCH and then
|
||||||
|
// runing the generate script for the target ARCH. Instead, we can
|
||||||
|
// just run `go run build.go -target=$GOARCH` to generate the
|
||||||
|
// deps.
|
||||||
|
flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
|
||||||
|
)
|
||||||
|
|
||||||
|
func buildEnv() []string {
|
||||||
|
return append(os.Environ(), "GOARCH="+cmp.Or(
|
||||||
|
*flagGOARCH,
|
||||||
|
os.Getenv("OLLAMA_BUILD_TARGET_ARCH"),
|
||||||
|
runtime.GOARCH,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
log.SetFlags(0)
|
||||||
|
flag.Usage = func() {
|
||||||
|
log.Printf("Usage: go run build.go [flags]")
|
||||||
|
log.Println()
|
||||||
|
log.Println("Flags:")
|
||||||
|
flag.PrintDefaults()
|
||||||
|
log.Println()
|
||||||
|
log.Println("This script builds the Ollama server binary and generates the llama.cpp")
|
||||||
|
log.Println("bindings for the current platform. It assumes that the current working")
|
||||||
|
log.Println("directory is the root directory of the Ollama project.")
|
||||||
|
log.Println()
|
||||||
|
log.Println("If the -d flag is provided, the script will force regeneration of the")
|
||||||
|
log.Println("dependencies; removing the 'llm/build' directory before starting.")
|
||||||
|
log.Println()
|
||||||
|
log.Println("If the -g flag is provided, the script will regenerate the dependencies")
|
||||||
|
log.Println("without removing the 'llm/build' directory.")
|
||||||
|
log.Println()
|
||||||
|
log.Println("If the -s flag is provided, the script will skip building the Ollama binary")
|
||||||
|
log.Println()
|
||||||
|
log.Println("If the -target flag is provided, the script will set GOARCH to the value")
|
||||||
|
log.Println("of the flag. This is useful for cross-platform builds.")
|
||||||
|
log.Println()
|
||||||
|
log.Println("The script will check for the required dependencies (cmake, gcc) and")
|
||||||
|
log.Println("print their version.")
|
||||||
|
log.Println()
|
||||||
|
log.Println("The script will also check if it is being run from the root directory of")
|
||||||
|
log.Println("the Ollama project.")
|
||||||
|
log.Println()
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
log.Printf("=== Building Ollama ===")
|
||||||
|
defer func() {
|
||||||
|
log.Printf("=== Done building Ollama ===")
|
||||||
|
if !*flagSkipBuild {
|
||||||
|
log.Println()
|
||||||
|
log.Println("To run the Ollama server, use:")
|
||||||
|
log.Println()
|
||||||
|
log.Println(" ./ollama serve")
|
||||||
|
log.Println()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if flag.NArg() > 0 {
|
||||||
|
flag.Usage()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inRootDir() {
|
||||||
|
log.Fatalf("Please run this script from the root directory of the Ollama project.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := checkDependencies(); err != nil {
|
||||||
|
log.Fatalf("Failed dependency check: %v", err)
|
||||||
|
}
|
||||||
|
if err := buildLlammaCPP(); err != nil {
|
||||||
|
log.Fatalf("Failed to build llama.cpp: %v", err)
|
||||||
|
}
|
||||||
|
if err := goBuildOllama(); err != nil {
|
||||||
|
log.Fatalf("Failed to build ollama Go binary: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkDependencies does a quick check to see if the required dependencies are
|
||||||
|
// installed on the system and functioning enough to print their version.
|
||||||
|
//
|
||||||
|
// TODO(bmizerany): Check the actual version of the dependencies? Seems a
|
||||||
|
// little daunting given diff versions might print diff things. This should
|
||||||
|
// be good enough for now.
|
||||||
|
func checkDependencies() error {
|
||||||
|
var err error
|
||||||
|
check := func(name string, args ...string) {
|
||||||
|
log.Printf("=== Checking for %s ===", name)
|
||||||
|
defer log.Printf("=== Done checking for %s ===\n\n", name)
|
||||||
|
cmd := exec.Command(name, args...)
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
err = errors.Join(err, cmd.Run())
|
||||||
|
}
|
||||||
|
|
||||||
|
check("cmake", "--version")
|
||||||
|
check("gcc", "--version")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func goBuildOllama() error {
|
||||||
|
log.Println("=== Building Ollama binary ===")
|
||||||
|
defer log.Printf("=== Done building Ollama binary ===\n\n")
|
||||||
|
if *flagSkipBuild {
|
||||||
|
log.Println("Skipping 'go build -o ollama .'")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cmd := exec.Command("go", "build", "-o", "ollama", ".")
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
cmd.Env = buildEnv()
|
||||||
|
return cmd.Run()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildLlammaCPP generates the llama.cpp bindings for the current platform.
|
||||||
|
//
|
||||||
|
// It assumes that the current working directory is the root directory of the
|
||||||
|
// Ollama project.
|
||||||
|
func buildLlammaCPP() error {
|
||||||
|
log.Println("=== Generating dependencies ===")
|
||||||
|
defer log.Printf("=== Done generating dependencies ===\n\n")
|
||||||
|
if *flagRegenerateDestroy {
|
||||||
|
if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isDirectory(filepath.Join("llm", "build")) && !*flagRegenerateGently {
|
||||||
|
log.Println("llm/build already exists; skipping. Use -d or -g to re-generate.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var cmd *exec.Cmd
|
||||||
|
switch runtime.GOOS {
|
||||||
|
case "windows":
|
||||||
|
script := filepath.Join(scriptDir, "gen_windows.ps1")
|
||||||
|
cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
|
||||||
|
case "linux":
|
||||||
|
script := filepath.Join(scriptDir, "gen_linux.sh")
|
||||||
|
cmd = exec.Command("bash", script)
|
||||||
|
case "darwin":
|
||||||
|
script := filepath.Join(scriptDir, "gen_darwin.sh")
|
||||||
|
cmd = exec.Command("bash", script)
|
||||||
|
default:
|
||||||
|
log.Fatalf("Unsupported OS: %s", runtime.GOOS)
|
||||||
|
}
|
||||||
|
cmd.Dir = filepath.Join("llm", "generate")
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
cmd.Env = buildEnv()
|
||||||
|
|
||||||
|
log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
|
||||||
|
|
||||||
|
return cmd.Run()
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDirectory(path string) bool {
|
||||||
|
info, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return info.IsDir()
|
||||||
|
}
|
||||||
|
|
||||||
|
// inRootDir returns true if the current working directory is the root
|
||||||
|
// directory of the Ollama project. It looks for a file named "go.mod".
|
||||||
|
func inRootDir() bool {
|
||||||
|
_, err := os.Stat("go.mod")
|
||||||
|
return err == nil
|
||||||
|
}
|
@ -25,13 +25,7 @@ export OLLAMA_DEBUG=1
|
|||||||
Get the required libraries and build the native LLM code:
|
Get the required libraries and build the native LLM code:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
go generate ./...
|
go run build.go
|
||||||
```
|
|
||||||
|
|
||||||
Then build ollama:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
go build .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Now you can run `ollama`:
|
Now you can run `ollama`:
|
||||||
@ -40,6 +34,16 @@ Now you can run `ollama`:
|
|||||||
./ollama
|
./ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Rebuilding the native code
|
||||||
|
|
||||||
|
If at any point you need to rebuild the native code, you can run the
|
||||||
|
build.go script again using the `-f` flag to force a rebuild, and,
|
||||||
|
optionally, the `-d` flag to skip building the Go binary:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go run build.go -d -s
|
||||||
|
```
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
|
|
||||||
#### Linux CUDA (NVIDIA)
|
#### Linux CUDA (NVIDIA)
|
||||||
@ -55,16 +59,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
|
|||||||
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
|
||||||
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
|
||||||
|
|
||||||
Then generate dependencies:
|
|
||||||
|
|
||||||
```
|
|
||||||
go generate ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
Then build the binary:
|
Then build the binary:
|
||||||
|
|
||||||
```
|
```
|
||||||
go build .
|
go run build.go
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Linux ROCm (AMD)
|
#### Linux ROCm (AMD)
|
||||||
@ -80,21 +78,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
|
|||||||
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
|
||||||
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
|
||||||
|
|
||||||
```
|
|
||||||
go generate ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
Then build the binary:
|
Then build the binary:
|
||||||
|
|
||||||
```
|
```
|
||||||
go build .
|
go run build.go
|
||||||
```
|
```
|
||||||
|
|
||||||
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
|
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
|
||||||
|
|
||||||
#### Advanced CPU Settings
|
#### Advanced CPU Settings
|
||||||
|
|
||||||
By default, running `go generate ./...` will compile a few different variations
|
By default, running `go run build.go` will compile a few different variations
|
||||||
of the LLM library based on common CPU families and vector math capabilities,
|
of the LLM library based on common CPU families and vector math capabilities,
|
||||||
including a lowest-common-denominator which should run on almost any 64 bit CPU
|
including a lowest-common-denominator which should run on almost any 64 bit CPU
|
||||||
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
|
somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
|
||||||
@ -104,8 +98,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
|
|||||||
you might use:
|
you might use:
|
||||||
|
|
||||||
```
|
```
|
||||||
OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
|
OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
|
||||||
go build .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Containerized Linux Build
|
#### Containerized Linux Build
|
||||||
@ -129,8 +122,7 @@ Then, build the `ollama` binary:
|
|||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
$env:CGO_ENABLED="1"
|
$env:CGO_ENABLED="1"
|
||||||
go generate ./...
|
go run build.go
|
||||||
go build .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Windows CUDA (NVIDIA)
|
#### Windows CUDA (NVIDIA)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# This script is intended to run inside the go generate
|
# This script is intended to run inside the `go run build.go` script, which
|
||||||
# working directory must be ./llm/generate/
|
# sets the working directory to the correct location: ./llm/generate/.
|
||||||
|
|
||||||
# TODO - add hardening to detect missing tools (cmake, etc.)
|
# TODO - add hardening to detect missing tools (cmake, etc.)
|
||||||
|
|
||||||
@ -92,10 +92,10 @@ case "${GOARCH}" in
|
|||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "GOARCH must be set"
|
echo "GOARCH must be set"
|
||||||
echo "this script is meant to be run from within go generate"
|
echo "this script is meant to be run from within 'go run build.go'"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
cleanup
|
cleanup
|
||||||
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# This script is intended to run inside the go generate
|
# This script is intended to run with the `go run build.go` script, which
|
||||||
# working directory must be llm/generate/
|
# sets the working directory to the correct location: ./llm/generate/.
|
||||||
|
|
||||||
# First we build one or more CPU based LLM libraries
|
# First we build one or more CPU based LLM libraries
|
||||||
#
|
#
|
||||||
@ -281,4 +281,4 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
cleanup
|
cleanup
|
||||||
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
echo "code generation completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
|
||||||
|
@ -26,26 +26,15 @@ function amdGPUs {
|
|||||||
$GPU_LIST -join ';'
|
$GPU_LIST -join ';'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function init_vars {
|
function init_vars {
|
||||||
if (!$script:SRC_DIR) {
|
$script:SRC_DIR = $(resolve-path "..\..\")
|
||||||
$script:SRC_DIR = $(resolve-path "..\..\")
|
$script:llamacppDir = "../llama.cpp"
|
||||||
}
|
|
||||||
if (!$script:llamacppDir) {
|
|
||||||
$script:llamacppDir = "../llama.cpp"
|
|
||||||
}
|
|
||||||
if (!$script:cmakeTargets) {
|
|
||||||
$script:cmakeTargets = @("ollama_llama_server")
|
|
||||||
}
|
|
||||||
$script:cmakeDefs = @(
|
$script:cmakeDefs = @(
|
||||||
"-DBUILD_SHARED_LIBS=on",
|
"-DBUILD_SHARED_LIBS=on",
|
||||||
"-DLLAMA_NATIVE=off",
|
"-DLLAMA_NATIVE=off"
|
||||||
"-DLLAMA_OPENMP=off"
|
|
||||||
)
|
)
|
||||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
$script:cmakeTargets = @("ollama_llama_server")
|
||||||
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
|
$script:ARCH = "amd64" # arm not yet supported.
|
||||||
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
|
|
||||||
md "$script:DIST_BASE" -ea 0 > $null
|
|
||||||
if ($env:CGO_CFLAGS -contains "-g") {
|
if ($env:CGO_CFLAGS -contains "-g") {
|
||||||
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
|
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
|
||||||
$script:config = "RelWithDebInfo"
|
$script:config = "RelWithDebInfo"
|
||||||
@ -66,6 +55,7 @@ function init_vars {
|
|||||||
} else {
|
} else {
|
||||||
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
|
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
|
||||||
}
|
}
|
||||||
|
$script:GZIP=(get-command -ea 'silentlycontinue' gzip).path
|
||||||
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
|
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
|
||||||
if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
|
if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
|
||||||
$script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
|
$script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
|
||||||
@ -123,13 +113,8 @@ function build {
|
|||||||
& cmake --version
|
& cmake --version
|
||||||
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
|
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
if ($cmakeDefs -contains "-G") {
|
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ })"
|
||||||
$extra=@("-j8")
|
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
|
||||||
} else {
|
|
||||||
$extra= @("--", "/p:CL_MPcount=8")
|
|
||||||
}
|
|
||||||
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
|
|
||||||
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
|
|
||||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||||
# Rearrange output to be consistent between different generators
|
# Rearrange output to be consistent between different generators
|
||||||
if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
|
if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
|
||||||
@ -149,18 +134,21 @@ function sign {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function install {
|
function compress {
|
||||||
write-host "Installing binaries to dist dir ${script:distDir}"
|
if ($script:GZIP -eq $null) {
|
||||||
mkdir ${script:distDir} -ErrorAction SilentlyContinue
|
write-host "gzip not installed, not compressing files"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
write-host "Compressing binaries..."
|
||||||
$binaries = dir "${script:buildDir}/bin/*.exe"
|
$binaries = dir "${script:buildDir}/bin/*.exe"
|
||||||
foreach ($file in $binaries) {
|
foreach ($file in $binaries) {
|
||||||
copy-item -Path $file -Destination ${script:distDir} -Force
|
& "$script:GZIP" --best -f $file
|
||||||
}
|
}
|
||||||
|
|
||||||
write-host "Installing dlls to dist dir ${script:distDir}"
|
write-host "Compressing dlls..."
|
||||||
$dlls = dir "${script:buildDir}/bin/*.dll"
|
$dlls = dir "${script:buildDir}/bin/*.dll"
|
||||||
foreach ($file in $dlls) {
|
foreach ($file in $dlls) {
|
||||||
copy-item -Path $file -Destination ${script:distDir} -Force
|
& "$script:GZIP" --best -f $file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,252 +169,132 @@ function cleanup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
init_vars
|
||||||
|
git_module_setup
|
||||||
|
apply_patches
|
||||||
|
|
||||||
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
||||||
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||||
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||||
|
|
||||||
|
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||||
|
|
||||||
function build_static() {
|
if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
|
||||||
if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
|
|
||||||
# GCC build for direct linking into the Go binary
|
# GCC build for direct linking into the Go binary
|
||||||
init_vars
|
init_vars
|
||||||
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
||||||
# as we need this to be compiled by gcc for golang to be able to link with itx
|
# as we need this to be compiled by gcc for golang to be able to link with itx
|
||||||
write-host "Checking for MinGW..."
|
write-host "Checking for MinGW..."
|
||||||
# error action ensures we exit on failure
|
# error action ensures we exit on failure
|
||||||
get-command gcc
|
get-command gcc
|
||||||
get-command mingw32-make
|
get-command mingw32-make
|
||||||
$oldTargets = $script:cmakeTargets
|
$script:cmakeTargets = @("llama", "ggml")
|
||||||
$script:cmakeTargets = @("llama", "ggml")
|
$script:cmakeDefs = @(
|
||||||
$script:cmakeDefs = @(
|
"-G", "MinGW Makefiles"
|
||||||
"-G", "MinGW Makefiles"
|
"-DCMAKE_C_COMPILER=gcc.exe",
|
||||||
"-DCMAKE_C_COMPILER=gcc.exe",
|
"-DCMAKE_CXX_COMPILER=g++.exe",
|
||||||
"-DCMAKE_CXX_COMPILER=g++.exe",
|
"-DBUILD_SHARED_LIBS=off",
|
||||||
"-DBUILD_SHARED_LIBS=off",
|
"-DLLAMA_NATIVE=off",
|
||||||
"-DLLAMA_NATIVE=off",
|
"-DLLAMA_AVX=off",
|
||||||
"-DLLAMA_AVX=off",
|
"-DLLAMA_AVX2=off",
|
||||||
"-DLLAMA_AVX2=off",
|
"-DLLAMA_AVX512=off",
|
||||||
"-DLLAMA_AVX512=off",
|
"-DLLAMA_F16C=off",
|
||||||
"-DLLAMA_F16C=off",
|
"-DLLAMA_FMA=off")
|
||||||
"-DLLAMA_FMA=off",
|
$script:buildDir="../build/windows/${script:ARCH}_static"
|
||||||
"-DLLAMA_OPENMP=off")
|
write-host "Building static library"
|
||||||
$script:buildDir="../build/windows/${script:ARCH}_static"
|
build
|
||||||
write-host "Building static library"
|
|
||||||
build
|
# remaining llama.cpp builds use MSVC
|
||||||
$script:cmakeTargets = $oldTargets
|
init_vars
|
||||||
} else {
|
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||||
write-host "Skipping CPU generation step as requested"
|
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
||||||
}
|
write-host "Building LCD CPU"
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
compress
|
||||||
|
|
||||||
|
init_vars
|
||||||
|
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
||||||
|
write-host "Building AVX CPU"
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
compress
|
||||||
|
|
||||||
|
init_vars
|
||||||
|
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
||||||
|
write-host "Building AVX2 CPU"
|
||||||
|
build
|
||||||
|
sign
|
||||||
|
compress
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CPU generation step as requested"
|
||||||
}
|
}
|
||||||
|
|
||||||
function build_cpu($gen_arch) {
|
if ($null -ne $script:CUDA_LIB_DIR) {
|
||||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
# Then build cuda as a dynamically loaded library
|
||||||
# remaining llama.cpp builds use MSVC
|
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
||||||
init_vars
|
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
if ($null -ne $script:CUDA_VERSION) {
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
||||||
$script:distDir="$script:DIST_BASE\cpu"
|
|
||||||
write-host "Building LCD CPU"
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
} else {
|
|
||||||
write-host "Skipping CPU generation step as requested"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function build_cpu_avx() {
|
|
||||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
|
|
||||||
init_vars
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
|
||||||
$script:distDir="$script:DIST_BASE\cpu_avx"
|
|
||||||
write-host "Building AVX CPU"
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
} else {
|
|
||||||
write-host "Skipping CPU AVX generation step as requested"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function build_cpu_avx2() {
|
|
||||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
|
|
||||||
init_vars
|
|
||||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
|
||||||
$script:distDir="$script:DIST_BASE\cpu_avx2"
|
|
||||||
write-host "Building AVX2 CPU"
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
} else {
|
|
||||||
write-host "Skipping CPU AVX2 generation step as requested"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function build_cuda() {
|
|
||||||
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
|
|
||||||
# Then build cuda as a dynamically loaded library
|
|
||||||
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
|
|
||||||
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
|
|
||||||
if ($null -ne $script:CUDA_VERSION) {
|
|
||||||
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
|
|
||||||
}
|
|
||||||
init_vars
|
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
|
||||||
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
|
|
||||||
$script:cmakeDefs += @(
|
|
||||||
"-A", "x64",
|
|
||||||
"-DLLAMA_CUDA=ON",
|
|
||||||
"-DLLAMA_AVX=on",
|
|
||||||
"-DLLAMA_AVX2=off",
|
|
||||||
"-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
|
|
||||||
"-DCMAKE_CUDA_FLAGS=-t8",
|
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
|
|
||||||
)
|
|
||||||
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
|
||||||
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
|
||||||
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
|
|
||||||
write-host "building custom CUDA GPU"
|
|
||||||
}
|
|
||||||
build
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
|
|
||||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
|
||||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
|
|
||||||
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
|
||||||
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
|
||||||
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
|
||||||
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
|
|
||||||
} else {
|
|
||||||
write-host "Skipping CUDA generation step"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function build_oneapi() {
|
|
||||||
if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}")) {
|
|
||||||
# Get oneAPI version
|
|
||||||
$script:ONEAPI_VERSION = icpx --version
|
|
||||||
$script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
|
|
||||||
if ($null -ne $script:ONEAPI_VERSION) {
|
|
||||||
$script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
|
|
||||||
}
|
}
|
||||||
init_vars
|
init_vars
|
||||||
$script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
|
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
|
||||||
$script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
|
$script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
|
||||||
$script:cmakeDefs += @(
|
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
|
||||||
"-G", "MinGW Makefiles",
|
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
|
||||||
"-DLLAMA_SYCL=ON",
|
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
|
||||||
"-DCMAKE_C_COMPILER=icx",
|
write-host "building custom CUDA GPU"
|
||||||
"-DCMAKE_CXX_COMPILER=icx",
|
}
|
||||||
"-DCMAKE_BUILD_TYPE=Release"
|
build
|
||||||
)
|
sign
|
||||||
|
compress
|
||||||
|
}
|
||||||
|
|
||||||
Write-Host "Building oneAPI"
|
if ($null -ne $env:HIP_PATH) {
|
||||||
|
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
||||||
|
if ($null -ne $script:ROCM_VERSION) {
|
||||||
|
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
||||||
|
}
|
||||||
|
|
||||||
|
init_vars
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
||||||
|
$script:cmakeDefs += @(
|
||||||
|
"-G", "Ninja",
|
||||||
|
"-DCMAKE_C_COMPILER=clang.exe",
|
||||||
|
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||||
|
"-DLLAMA_HIPBLAS=on",
|
||||||
|
"-DHIP_PLATFORM=amd",
|
||||||
|
"-DLLAMA_AVX=on",
|
||||||
|
"-DLLAMA_AVX2=off",
|
||||||
|
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
||||||
|
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
||||||
|
"-DGPU_TARGETS=$(amdGPUs)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make sure the ROCm binary dir is first in the path
|
||||||
|
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
|
||||||
|
|
||||||
|
# We have to clobber the LIB var from the developer shell for clang to work properly
|
||||||
|
$env:LIB=""
|
||||||
|
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
|
||||||
|
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
|
||||||
|
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
|
||||||
|
write-host "building custom ROCM GPU"
|
||||||
|
}
|
||||||
|
write-host "Building ROCm"
|
||||||
build
|
build
|
||||||
# Ninja doesn't prefix with config name
|
# Ninja doesn't prefix with config name
|
||||||
|
${script:config}=""
|
||||||
if ($null -ne $script:DUMPBIN) {
|
if ($null -ne $script:DUMPBIN) {
|
||||||
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
|
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
|
||||||
}
|
}
|
||||||
sign
|
sign
|
||||||
install
|
compress
|
||||||
|
|
||||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
|
|
||||||
} else {
|
|
||||||
Write-Host "Skipping oneAPI generation step"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function build_rocm() {
|
|
||||||
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
|
|
||||||
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
|
||||||
if ($null -ne $script:ROCM_VERSION) {
|
|
||||||
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
|
|
||||||
}
|
|
||||||
|
|
||||||
init_vars
|
cleanup
|
||||||
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
|
write-host "`code generation completed. LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
|
||||||
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
|
|
||||||
$script:cmakeDefs += @(
|
|
||||||
"-G", "Ninja",
|
|
||||||
"-DCMAKE_C_COMPILER=clang.exe",
|
|
||||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
|
||||||
"-DLLAMA_HIPBLAS=on",
|
|
||||||
"-DHIP_PLATFORM=amd",
|
|
||||||
"-DLLAMA_AVX=on",
|
|
||||||
"-DLLAMA_AVX2=off",
|
|
||||||
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
|
||||||
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
|
||||||
"-DGPU_TARGETS=$(amdGPUs)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Make sure the ROCm binary dir is first in the path
|
|
||||||
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
|
|
||||||
|
|
||||||
# We have to clobber the LIB var from the developer shell for clang to work properly
|
|
||||||
$env:LIB=""
|
|
||||||
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
|
|
||||||
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
|
|
||||||
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
|
|
||||||
write-host "building custom ROCM GPU"
|
|
||||||
}
|
|
||||||
write-host "Building ROCm"
|
|
||||||
build
|
|
||||||
# Ninja doesn't prefix with config name
|
|
||||||
${script:config}=""
|
|
||||||
if ($null -ne $script:DUMPBIN) {
|
|
||||||
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
|
|
||||||
}
|
|
||||||
sign
|
|
||||||
install
|
|
||||||
|
|
||||||
# Assumes v5.7, may need adjustments for v6
|
|
||||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
|
||||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
|
|
||||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
|
||||||
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
|
||||||
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
|
|
||||||
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
|
|
||||||
} else {
|
|
||||||
write-host "Skipping ROCm generation step"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
init_vars
|
|
||||||
if ($($args.count) -eq 0) {
|
|
||||||
git_module_setup
|
|
||||||
apply_patches
|
|
||||||
build_static
|
|
||||||
if ($script:ARCH -eq "arm64") {
|
|
||||||
build_cpu("ARM64")
|
|
||||||
} else { # amd64
|
|
||||||
build_cpu("x64")
|
|
||||||
build_cpu_avx
|
|
||||||
build_cpu_avx2
|
|
||||||
build_cuda
|
|
||||||
build_oneapi
|
|
||||||
build_rocm
|
|
||||||
}
|
|
||||||
|
|
||||||
cleanup
|
|
||||||
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
|
|
||||||
} else {
|
|
||||||
for ( $i = 0; $i -lt $args.count; $i++ ) {
|
|
||||||
write-host "performing $($args[$i])"
|
|
||||||
& $($args[$i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
package generate
|
|
||||||
|
|
||||||
//go:generate bash ./gen_darwin.sh
|
|
@ -1,3 +0,0 @@
|
|||||||
package generate
|
|
||||||
|
|
||||||
//go:generate bash ./gen_linux.sh
|
|
@ -1,3 +0,0 @@
|
|||||||
package generate
|
|
||||||
|
|
||||||
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
|
|
Loading…
x
Reference in New Issue
Block a user