introduce build.go for controlling distribution builds

This commit aims to provide the Ollama maintainers with maximum control of the distribution build process by creating a cross-platform shim. Currently, we have no flexibility, or control of the process (pre and post) or even the quality of the build. By introducing a shim, and propagating it out to Homebrew, et al., we can soon after ensure that the build process is consistent, and reliable. This also happens to remove the requirement for go generate and the build tag hacks, but it does still support go generate in the flow, at least until we can remove it after the major distribution use the new build process. About the script: Beyond giving the Ollama maintainers drastically more control over the build process, the script also provides a few other benefits: - It is cross-platform, and can be run on any platform that supports Go (a hard requirement for building Ollama anyway). - It can can check for correct versions of cmake, and other dependencies before starting the build process, and provide helpful error messages to the user if they are not met. - It can be used to build the distribution for any platform, architecture, or build type (debug, release, etc.) with a single command. Currently, it is two commands. - It can skip parts of the build process if they are already done, such as build the C dependencies. Of course there is a -f flag to force rebuild. - So much more!
2024-06-30 22:18:45 -07:00
11 changed files with 390 additions and 352 deletions
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@ -73,12 +73,12 @@ jobs:
          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
          $env:PATH="$gopath;$gccpath;$env:PATH"
          echo $env:PATH
-          go generate -x ./...
+          $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
        if: ${{ startsWith(matrix.os, 'windows-') }}
-        name: 'Windows Go Generate'
+        name: 'Windows Generate'
-      - run: go generate -x ./...
+      - run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
        if: ${{ ! startsWith(matrix.os, 'windows-') }}
-        name: 'Unix Go Generate'
+        name: 'Unix Generate'
      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
@ -184,7 +184,7 @@ jobs:
          $env:OLLAMA_SKIP_CPU_GENERATE="1"
          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
          go generate -x ./...
-        name: go generate
+        name: go generate -x ./...
        env:
          OLLAMA_SKIP_CPU_GENERATE: '1'
      # TODO - do we need any artifacts?
@ -217,7 +217,7 @@ jobs:
      - name: 'Verify CUDA'
        run: nvcc -V
      - run: go get ./...
-      - name: go generate
+      - name: go generate -x ./...
        run: |
          $gopath=(get-command go).source | split-path -parent
          $cudabin=(get-command nvcc).source | split-path
@ -312,7 +312,10 @@ jobs:
          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
        if: ${{ startsWith(matrix.os, 'macos-') }}
        shell: bash
-      - run: go generate ./...
+      - run: $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
        if: ${{ startsWith(matrix.os, 'windows-') }}
      - run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
        if: ${{ ! startsWith(matrix.os, 'windows-') }}
      - run: go build
      - run: go test -v ./...
      - uses: actions/upload-artifact@v4
--- a/README.md
+++ b/README.md
@ -1,12 +1,12 @@
 <div align="center">
- <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
+  <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
 </div>
 # Ollama
 [![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
-Get up and running with large language models.
+Get up and running with large language models locally.
 ### macOS
@ -51,17 +51,15 @@ Here are some example models that can be downloaded:
 | ------------------ | ---------- | ----- | ------------------------------ |
 | Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
 | Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
-| Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
+| Phi-3              | 3,8B       | 2.3GB | `ollama run phi3`              |
 | Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
 | Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
 | Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
 | Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
 | Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
 | Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
 | Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
 | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
 | Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
 | Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
 > Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@ -175,19 +173,13 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 The image features a yellow smiley face, which is likely the central focus of the picture.
 ```
-### Pass the prompt as an argument
+### Pass in prompt as arguments
 ```
 $ ollama run llama3 "Summarize this file: $(cat README.md)"
 Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```
 ### Show model information
 ```
 ollama show llama3
 ```
 ### List models on your computer
 ```
@ -200,7 +192,19 @@ ollama list
 ## Building
-See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
+Install `cmake` and `go`:
 ```
 brew install cmake go
 ```
 Then build the binary:
 ```
 go run build.go
 ```
 More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
 ### Running local builds
@ -248,7 +252,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Open WebUI](https://github.com/open-webui/open-webui)
 - [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
 - [Hollama](https://github.com/fmaclen/hollama)
 - [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
 - [LibreChat](https://github.com/danny-avila/LibreChat)
 - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
@ -275,24 +278,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
 - [OpenAOE](https://github.com/InternLM/OpenAOE)
 - [Odin Runes](https://github.com/leonid20000/OdinRunes)
- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
+- [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
 - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
 - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
 - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository)
+- [QA-Pilot: Chat with Code Repository](https://github.com/reid41/QA-Pilot)
- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
+- [ChatOllama: Open Source Chatbot based on Ollama with Knowledge Bases](https://github.com/sugarforever/chat-ollama)
- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
+- [CRAG Ollama Chat: Simple Web Search with Corrective RAG](https://github.com/Nagi-ovo/CRAG-Ollama-Chat)
- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
+- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold)
+- [chat: chat web app for teams](https://github.com/swuecho/chat)
 - [chat](https://github.com/swuecho/chat) (chat web app for teams)
 - [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
+- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
 - [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
 - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
 - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
 - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
 - [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
 - [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
 ### Terminal
@ -315,7 +311,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ShellOracle](https://github.com/djcopley/ShellOracle)
 - [tlm](https://github.com/yusufcanb/tlm)
 - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
 - [gollama](https://github.com/sammcj/gollama)
 ### Database
@ -326,20 +321,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 - [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
 ### Libraries
 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
 - [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
 - [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
 - [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
@ -350,13 +342,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
 - [Elixir LangChain](https://github.com/brainlid/langchain)
 - [Ollama for R - rollama](https://github.com/JBGruber/rollama)
 - [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
 - [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
 - [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
 - [Testcontainers](https://testcontainers.com/modules/ollama/)
 - [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
 - [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
 - [LlamaScript](https://github.com/Project-Llama/llamascript)
 ### Mobile
@ -376,23 +364,18 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
 - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
 - [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
 - [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
 - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
 - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
 - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
 - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
 - [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
 ### Supported backends
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
 ### Supported backends 
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
--- a/build.go
+++ b/build.go
@ -0,0 +1,199 @@
 //go:build ignore
 package main
 import (
 	"cmp"
 	"errors"
 	"flag"
 	"log"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 )
 // Flags
 var (
 	flagRegenerateDestroy = flag.Bool("d", false, "force regenerate the dependencies (destructive)")
 	flagRegenerateGently  = flag.Bool("g", false, "regenerate the dependencies (non-destructive)")
 	flagSkipBuild         = flag.Bool("s", false, "generate dependencies only (e.g. skip 'go build .')")
 	// Flags to set GOARCH explicitly for cross-platform builds,
 	// e.g., in CI to target a different platform than the build matrix
 	// default. These allows us to run generate without a separate build
 	// step for building the script binary for the host ARCH and then
 	// runing the generate script for the target ARCH. Instead, we can
 	// just run `go run build.go -target=$GOARCH` to generate the
 	// deps.
 	flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
 )
 func buildEnv() []string {
 	return append(os.Environ(), "GOARCH="+cmp.Or(
 		*flagGOARCH,
 		os.Getenv("OLLAMA_BUILD_TARGET_ARCH"),
 		runtime.GOARCH,
 	))
 }
 func main() {
 	log.SetFlags(0)
 	flag.Usage = func() {
 		log.Printf("Usage: go run build.go [flags]")
 		log.Println()
 		log.Println("Flags:")
 		flag.PrintDefaults()
 		log.Println()
 		log.Println("This script builds the Ollama server binary and generates the llama.cpp")
 		log.Println("bindings for the current platform. It assumes that the current working")
 		log.Println("directory is the root directory of the Ollama project.")
 		log.Println()
 		log.Println("If the -d flag is provided, the script will force regeneration of the")
 		log.Println("dependencies; removing the 'llm/build' directory before starting.")
 		log.Println()
 		log.Println("If the -g flag is provided, the script will regenerate the dependencies")
 		log.Println("without removing the 'llm/build' directory.")
 		log.Println()
 		log.Println("If the -s flag is provided, the script will skip building the Ollama binary")
 		log.Println()
 		log.Println("If the -target flag is provided, the script will set GOARCH to the value")
 		log.Println("of the flag. This is useful for cross-platform builds.")
 		log.Println()
 		log.Println("The script will check for the required dependencies (cmake, gcc) and")
 		log.Println("print their version.")
 		log.Println()
 		log.Println("The script will also check if it is being run from the root directory of")
 		log.Println("the Ollama project.")
 		log.Println()
 		os.Exit(1)
 	}
 	flag.Parse()
 	log.Printf("=== Building Ollama ===")
 	defer func() {
 		log.Printf("=== Done building Ollama ===")
 		if !*flagSkipBuild {
 			log.Println()
 			log.Println("To run the Ollama server, use:")
 			log.Println()
 			log.Println("    ./ollama serve")
 			log.Println()
 		}
 	}()
 	if flag.NArg() > 0 {
 		flag.Usage()
 	}
 	if !inRootDir() {
 		log.Fatalf("Please run this script from the root directory of the Ollama project.")
 	}
 	if err := checkDependencies(); err != nil {
 		log.Fatalf("Failed dependency check: %v", err)
 	}
 	if err := buildLlammaCPP(); err != nil {
 		log.Fatalf("Failed to build llama.cpp: %v", err)
 	}
 	if err := goBuildOllama(); err != nil {
 		log.Fatalf("Failed to build ollama Go binary: %v", err)
 	}
 }
 // checkDependencies does a quick check to see if the required dependencies are
 // installed on the system and functioning enough to print their version.
 //
 // TODO(bmizerany): Check the actual version of the dependencies? Seems a
 // little daunting given diff versions might print diff things. This should
 // be good enough for now.
 func checkDependencies() error {
 	var err error
 	check := func(name string, args ...string) {
 		log.Printf("=== Checking for %s ===", name)
 		defer log.Printf("=== Done checking for %s ===\n\n", name)
 		cmd := exec.Command(name, args...)
 		cmd.Stdout = os.Stdout
 		cmd.Stderr = os.Stderr
 		err = errors.Join(err, cmd.Run())
 	}
 	check("cmake", "--version")
 	check("gcc", "--version")
 	return err
 }
 func goBuildOllama() error {
 	log.Println("=== Building Ollama binary ===")
 	defer log.Printf("=== Done building Ollama binary ===\n\n")
 	if *flagSkipBuild {
 		log.Println("Skipping 'go build -o ollama .'")
 		return nil
 	}
 	cmd := exec.Command("go", "build", "-o", "ollama", ".")
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.Env = buildEnv()
 	return cmd.Run()
 }
 // buildLlammaCPP generates the llama.cpp bindings for the current platform.
 //
 // It assumes that the current working directory is the root directory of the
 // Ollama project.
 func buildLlammaCPP() error {
 	log.Println("=== Generating dependencies ===")
 	defer log.Printf("=== Done generating dependencies ===\n\n")
 	if *flagRegenerateDestroy {
 		if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
 			return err
 		}
 	}
 	if isDirectory(filepath.Join("llm", "build")) && !*flagRegenerateGently {
 		log.Println("llm/build already exists; skipping.  Use -d or -g to re-generate.")
 		return nil
 	}
 	scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
 	if err != nil {
 		return err
 	}
 	var cmd *exec.Cmd
 	switch runtime.GOOS {
 	case "windows":
 		script := filepath.Join(scriptDir, "gen_windows.ps1")
 		cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
 	case "linux":
 		script := filepath.Join(scriptDir, "gen_linux.sh")
 		cmd = exec.Command("bash", script)
 	case "darwin":
 		script := filepath.Join(scriptDir, "gen_darwin.sh")
 		cmd = exec.Command("bash", script)
 	default:
 		log.Fatalf("Unsupported OS: %s", runtime.GOOS)
 	}
 	cmd.Dir = filepath.Join("llm", "generate")
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	cmd.Env = buildEnv()
 	log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
 	return cmd.Run()
 }
 func isDirectory(path string) bool {
 	info, err := os.Stat(path)
 	if err != nil {
 		return false
 	}
 	return info.IsDir()
 }
 // inRootDir returns true if the current working directory is the root
 // directory of the Ollama project. It looks for a file named "go.mod".
 func inRootDir() bool {
 	_, err := os.Stat("go.mod")
 	return err == nil
 }
--- a/docs/development.md
+++ b/docs/development.md
@ -25,13 +25,7 @@ export OLLAMA_DEBUG=1
 Get the required libraries and build the native LLM code:
 ```bash
-go generate ./...
+go run build.go
 ```
 Then build ollama:
 ```bash
 go build .
 ```
 Now you can run `ollama`:
@ -40,6 +34,16 @@ Now you can run `ollama`:
 ./ollama
 ```
 ### Rebuilding the native code
 If at any point you need to rebuild the native code, you can run the
 build.go script again using the `-f` flag to force a rebuild, and,
 optionally, the `-d` flag to skip building the Go binary:
 ```bash
 go run build.go -d -s
 ```
 ### Linux
 #### Linux CUDA (NVIDIA)
@ -55,16 +59,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
 libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
 a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
 Then generate dependencies:
 ```
 go generate ./...
 ```
 Then build the binary:
 ```
-go build .
+go run build.go
 ```
 #### Linux ROCm (AMD)
@ -80,21 +78,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
 CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
 the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
 ```
 go generate ./...
 ```
 Then build the binary:
 ```
-go build .
+go run build.go
 ```
 ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
 #### Advanced CPU Settings
-By default, running `go generate ./...` will compile a few different variations
+By default, running `go run build.go` will compile a few different variations
 of the LLM library based on common CPU families and vector math capabilities,
 including a lowest-common-denominator which should run on almost any 64 bit CPU
 somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
@ -104,8 +98,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 you might use:
 ```
-OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
 go build .
 ```
 #### Containerized Linux Build
@ -129,8 +122,7 @@ Then, build the `ollama` binary:
 ```powershell
 $env:CGO_ENABLED="1"
-go generate ./...
+go run build.go
 go build .
 ```
 #### Windows CUDA (NVIDIA)
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@ -1,6 +1,6 @@
 #!/bin/bash
-# This script is intended to run inside the go generate
+# This script is intended to run inside the `go run build.go` script, which
-# working directory must be ./llm/generate/
+# sets the working directory to the correct location: ./llm/generate/.
 # TODO - add hardening to detect missing tools (cmake, etc.)
@ -92,10 +92,10 @@ case "${GOARCH}" in
    ;;
 *)
    echo "GOARCH must be set"
-    echo "this script is meant to be run from within go generate"
+    echo "this script is meant to be run from within 'go run build.go'"
    exit 1
    ;;
 esac
 cleanup
-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@ -1,6 +1,6 @@
 #!/bin/bash
-# This script is intended to run inside the go generate
+# This script is intended to run with the `go run build.go` script, which
-# working directory must be llm/generate/
+# sets the working directory to the correct location: ./llm/generate/.
 # First we build one or more CPU based LLM libraries
 #
@ -281,4 +281,4 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
 fi
 cleanup
-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
+echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@ -26,26 +26,15 @@ function amdGPUs {
    $GPU_LIST -join ';'
 }
 function init_vars {
-    if (!$script:SRC_DIR) {
+    $script:SRC_DIR = $(resolve-path "..\..\")
-        $script:SRC_DIR = $(resolve-path "..\..\")
+    $script:llamacppDir = "../llama.cpp"
    }
    if (!$script:llamacppDir) {
        $script:llamacppDir = "../llama.cpp"
    }
    if (!$script:cmakeTargets) {
        $script:cmakeTargets = @("ollama_llama_server")
    }
    $script:cmakeDefs = @(
        "-DBUILD_SHARED_LIBS=on",
-        "-DLLAMA_NATIVE=off",
+        "-DLLAMA_NATIVE=off"
        "-DLLAMA_OPENMP=off"
        )
-    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
+    $script:cmakeTargets = @("ollama_llama_server")
-    $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
+    $script:ARCH = "amd64" # arm not yet supported.
    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
    md "$script:DIST_BASE" -ea 0 > $null
    if ($env:CGO_CFLAGS -contains "-g") {
        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
        $script:config = "RelWithDebInfo"
@ -66,6 +55,7 @@ function init_vars {
    } else {
        $script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
    }
    $script:GZIP=(get-command -ea 'silentlycontinue' gzip).path
    $script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
    if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
        $script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
@ -123,13 +113,8 @@ function build {
    & cmake --version
    & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
-    if ($cmakeDefs -contains "-G") {
+    write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ })"
-        $extra=@("-j8")
+    & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
    } else {
        $extra= @("--", "/p:CL_MPcount=8")
    }
    write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
    & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
    # Rearrange output to be consistent between different generators
    if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
@ -149,18 +134,21 @@ function sign {
    }
 }
-function install {
+function compress {
-    write-host "Installing binaries to dist dir ${script:distDir}"
+    if ($script:GZIP -eq $null) {
-    mkdir ${script:distDir} -ErrorAction SilentlyContinue
+        write-host "gzip not installed, not compressing files"
        return
    }
    write-host "Compressing binaries..."
    $binaries = dir "${script:buildDir}/bin/*.exe"
    foreach ($file in $binaries) {
-        copy-item -Path $file -Destination ${script:distDir} -Force
+        & "$script:GZIP" --best -f $file
    }
-    write-host "Installing dlls to dist dir ${script:distDir}"
+    write-host "Compressing dlls..."
    $dlls = dir "${script:buildDir}/bin/*.dll"
    foreach ($file in $dlls) {
-        copy-item -Path $file -Destination ${script:distDir} -Force
+        & "$script:GZIP" --best -f $file
    }
 }
@ -181,252 +169,132 @@ function cleanup {
    }
 }
 init_vars
 git_module_setup
 apply_patches
 # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
 # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
 # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
 $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
-function build_static() {
+if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
-    if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
+
-        # GCC build for direct linking into the Go binary
+# GCC build for direct linking into the Go binary
-        init_vars
+init_vars
-        # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
+# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
-        # as we need this to be compiled by gcc for golang to be able to link with itx
+# as we need this to be compiled by gcc for golang to be able to link with itx
-        write-host "Checking for MinGW..."
+write-host "Checking for MinGW..."
-        # error action ensures we exit on failure
+# error action ensures we exit on failure
-        get-command gcc
+get-command gcc
-        get-command mingw32-make
+get-command mingw32-make
-        $oldTargets = $script:cmakeTargets
+$script:cmakeTargets = @("llama", "ggml")
-        $script:cmakeTargets = @("llama", "ggml")
+$script:cmakeDefs = @(
-        $script:cmakeDefs = @(
+    "-G", "MinGW Makefiles"
-            "-G", "MinGW Makefiles"
+    "-DCMAKE_C_COMPILER=gcc.exe",
-            "-DCMAKE_C_COMPILER=gcc.exe",
+    "-DCMAKE_CXX_COMPILER=g++.exe",
-            "-DCMAKE_CXX_COMPILER=g++.exe",
+    "-DBUILD_SHARED_LIBS=off",
-            "-DBUILD_SHARED_LIBS=off",
+    "-DLLAMA_NATIVE=off",
-            "-DLLAMA_NATIVE=off",
+    "-DLLAMA_AVX=off",
-            "-DLLAMA_AVX=off",
+    "-DLLAMA_AVX2=off",
-            "-DLLAMA_AVX2=off",
+    "-DLLAMA_AVX512=off",
-            "-DLLAMA_AVX512=off",
+    "-DLLAMA_F16C=off",
-            "-DLLAMA_F16C=off",
+    "-DLLAMA_FMA=off")
-            "-DLLAMA_FMA=off",
+$script:buildDir="../build/windows/${script:ARCH}_static"
-            "-DLLAMA_OPENMP=off")
+write-host "Building static library"
-        $script:buildDir="../build/windows/${script:ARCH}_static"
+build
-        write-host "Building static library"
+
-        build
+# remaining llama.cpp builds use MSVC 
-        $script:cmakeTargets = $oldTargets
+    init_vars
-    } else {
+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
-        write-host "Skipping CPU generation step as requested"
+    $script:buildDir="../build/windows/${script:ARCH}/cpu"
-    }
+    write-host "Building LCD CPU"
    build
    sign
    compress
    init_vars
    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
    write-host "Building AVX CPU"
    build
    sign
    compress
    init_vars
    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
    write-host "Building AVX2 CPU"
    build
    sign
    compress
 } else {
    write-host "Skipping CPU generation step as requested"
 }
-function build_cpu($gen_arch) {
+if ($null -ne $script:CUDA_LIB_DIR) {
-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
+    # Then build cuda as a dynamically loaded library
-        # remaining llama.cpp builds use MSVC 
+    $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
-        init_vars
+    $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+    if ($null -ne $script:CUDA_VERSION) {
-        $script:buildDir="../build/windows/${script:ARCH}/cpu"
+        $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
        $script:distDir="$script:DIST_BASE\cpu"
        write-host "Building LCD CPU"
        build
        sign
        install
    } else {
        write-host "Skipping CPU generation step as requested"
    }
 }
 function build_cpu_avx() {
    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
        init_vars
        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
        $script:distDir="$script:DIST_BASE\cpu_avx"
        write-host "Building AVX CPU"
        build
        sign
        install
    } else {
        write-host "Skipping CPU AVX generation step as requested"
    }
 }
 function build_cpu_avx2() {
    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
        init_vars
        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
        $script:distDir="$script:DIST_BASE\cpu_avx2"
        write-host "Building AVX2 CPU"
        build
        sign
        install
    } else {
        write-host "Skipping CPU AVX2 generation step as requested"
    }
 }
 function build_cuda() {
    if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
        # Then build cuda as a dynamically loaded library
        $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
        $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
        if ($null -ne $script:CUDA_VERSION) {
            $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
        }
        init_vars
        $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
        $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
        $script:cmakeDefs += @(
            "-A", "x64",
            "-DLLAMA_CUDA=ON",
            "-DLLAMA_AVX=on",
            "-DLLAMA_AVX2=off",
            "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
            "-DCMAKE_CUDA_FLAGS=-t8",
            "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
            )
        if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
            write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
            $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
            write-host "building custom CUDA GPU"
        }
        build
        sign
        install
        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
        write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
        cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
        cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
        cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
    } else {
        write-host "Skipping CUDA generation step"
    }
 }
 function build_oneapi() {
  if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
    # Get oneAPI version
    $script:ONEAPI_VERSION = icpx --version
    $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
    if ($null -ne $script:ONEAPI_VERSION) {
      $script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
    }
    init_vars
-    $script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
+    $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
-    $script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
+    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
-    $script:cmakeDefs += @(
+    if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
-      "-G", "MinGW Makefiles",
+        write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
-      "-DLLAMA_SYCL=ON",
+        $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
-      "-DCMAKE_C_COMPILER=icx",
+        write-host "building custom CUDA GPU"
-      "-DCMAKE_CXX_COMPILER=icx",
+    }
-      "-DCMAKE_BUILD_TYPE=Release"
+    build
-    )
+    sign
    compress
 }
-    Write-Host "Building oneAPI"
+if ($null -ne $env:HIP_PATH) {
    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
    if ($null -ne $script:ROCM_VERSION) {
        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
    }
    init_vars
    $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
    $script:cmakeDefs += @(
        "-G", "Ninja", 
        "-DCMAKE_C_COMPILER=clang.exe",
        "-DCMAKE_CXX_COMPILER=clang++.exe",
        "-DLLAMA_HIPBLAS=on",
        "-DHIP_PLATFORM=amd",
        "-DLLAMA_AVX=on",
        "-DLLAMA_AVX2=off",
        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
        "-DAMDGPU_TARGETS=$(amdGPUs)",
        "-DGPU_TARGETS=$(amdGPUs)"
        )
    # Make sure the ROCm binary dir is first in the path
    $env:PATH="$env:HIP_PATH\bin;$env:PATH"
    # We have to clobber the LIB var from the developer shell for clang to work properly
    $env:LIB=""
    if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
        write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
        $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
        write-host "building custom ROCM GPU"
    }
    write-host "Building ROCm"
    build
    # Ninja doesn't prefix with config name
    ${script:config}=""
    if ($null -ne $script:DUMPBIN) {
-      & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
+        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
    }
    sign
-    install
+    compress
    rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
  } else {
    Write-Host "Skipping oneAPI generation step"
  }
 }
 function build_rocm() {
    if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
        $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
        if ($null -ne $script:ROCM_VERSION) {
            $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
        }
-        init_vars
+cleanup
-        $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
+write-host "`code generation completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
        $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
        $script:cmakeDefs += @(
            "-G", "Ninja", 
            "-DCMAKE_C_COMPILER=clang.exe",
            "-DCMAKE_CXX_COMPILER=clang++.exe",
            "-DLLAMA_HIPBLAS=on",
            "-DHIP_PLATFORM=amd",
            "-DLLAMA_AVX=on",
            "-DLLAMA_AVX2=off",
            "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
            "-DAMDGPU_TARGETS=$(amdGPUs)",
            "-DGPU_TARGETS=$(amdGPUs)"
            )
        # Make sure the ROCm binary dir is first in the path
        $env:PATH="$env:HIP_PATH\bin;$env:PATH"
        # We have to clobber the LIB var from the developer shell for clang to work properly
        $env:LIB=""
        if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
            write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
            $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
            write-host "building custom ROCM GPU"
        }
        write-host "Building ROCm"
        build
        # Ninja doesn't prefix with config name
        ${script:config}=""
        if ($null -ne $script:DUMPBIN) {
            & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
        }
        sign
        install
        # Assumes v5.7, may need adjustments for v6
        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
    } else {
        write-host "Skipping ROCm generation step"
    }
 }
 init_vars
 if ($($args.count) -eq 0) {
    git_module_setup
    apply_patches
    build_static
    if ($script:ARCH -eq "arm64") {
        build_cpu("ARM64")
    } else { # amd64
        build_cpu("x64")
        build_cpu_avx
        build_cpu_avx2
        build_cuda
        build_oneapi
        build_rocm
    }
    cleanup
    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
 } else {
    for ( $i = 0; $i -lt $args.count; $i++ ) {
        write-host "performing $($args[$i])"
        & $($args[$i])
    } 
 }
--- a/llm/generate/generate_darwin.go
+++ b/llm/generate/generate_darwin.go
@ -1,3 +0,0 @@
 package generate
 //go:generate bash ./gen_darwin.sh
--- a/llm/generate/generate_linux.go
+++ b/llm/generate/generate_linux.go
@ -1,3 +0,0 @@
 package generate
 //go:generate bash ./gen_linux.sh
--- a/llm/generate/generate_windows.go
+++ b/llm/generate/generate_windows.go
@ -1,3 +0,0 @@
 package generate
 //go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
--- a/main.go
+++ b/main.go
@ -1,5 +1,7 @@
 package main
 //go:generate go run build.go -g -s
 import (
 	"context"
		`@ -1,3 +0,0 @@`
			`package generate`

			`//go:generate bash ./gen_darwin.sh`
		`@ -1,3 +0,0 @@`
			`package generate`

			`//go:generate bash ./gen_linux.sh`
		`@ -1,3 +0,0 @@`
			`package generate`

			`//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1`