Merge branch 'ollama:main' into cors

This commit is contained in:
frob 2024-08-23 00:11:12 +02:00 committed by GitHub
commit 6fdf8235dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
93 changed files with 2354 additions and 1185 deletions

3
.gitattributes vendored
View File

@ -1,2 +1,3 @@
llm/ext_server/* linguist-vendored llm/ext_server/* linguist-vendored
* text eol=lf * text=auto
*.go text eol=lf

View File

@ -31,7 +31,7 @@ jobs:
security set-keychain-settings -lut 3600 build.keychain security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- name: Build Darwin - name: Build Darwin
env: env:
@ -87,7 +87,7 @@ jobs:
write-host "plugin installed" write-host "plugin installed"
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- run: go get ./... - run: go get ./...
- run: | - run: |
@ -141,7 +141,7 @@ jobs:
write-host "plugin installed" write-host "plugin installed"
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- name: 'Install ROCm' - name: 'Install ROCm'
run: | run: |
@ -187,6 +187,13 @@ jobs:
generate-windows-cuda: generate-windows-cuda:
environment: release environment: release
runs-on: windows runs-on: windows
strategy:
matrix:
cuda:
- version: "11"
url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
- version: "12"
url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env: env:
KEY_CONTAINER: ${{ vars.KEY_CONTAINER }} KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
steps: steps:
@ -218,13 +225,13 @@ jobs:
write-host "plugin installed" write-host "plugin installed"
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- name: 'Install CUDA' - name: 'Install CUDA ${{ matrix.cuda.version }}'
run: | run: |
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer" write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe" Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA" write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA" write-host "Completed CUDA"
@ -256,15 +263,16 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\" cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: generate-windows-cuda name: generate-windows-cuda-${{ matrix.cuda.version }}
path: | path: |
llm/build/**/bin/* llm/build/**/bin/*
dist/windows-amd64/** dist/windows-amd64/**
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: windows-cuda-deps name: windows-cuda-deps-${{ matrix.cuda.version }}
path: dist/deps/* path: dist/deps/*
# Import the prior generation steps and build the final windows assets # Import the prior generation steps and build the final windows assets
build-windows: build-windows:
environment: release environment: release
@ -306,7 +314,7 @@ jobs:
write-host "plugin installed" write-host "plugin installed"
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- run: go get - run: go get
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
@ -314,10 +322,16 @@ jobs:
name: generate-windows-cpu name: generate-windows-cpu
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: generate-windows-cuda name: generate-windows-cuda-11
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: windows-cuda-deps name: generate-windows-cuda-12
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps-11
- uses: actions/download-artifact@v4
with:
name: windows-cuda-deps-12
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
name: windows-rocm-deps name: windows-rocm-deps
@ -363,7 +377,6 @@ jobs:
- run: | - run: |
./scripts/build_linux.sh ./scripts/build_linux.sh
./scripts/build_docker.sh ./scripts/build_docker.sh
mv dist/deps/* dist/
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: dist-linux-amd64 name: dist-linux-amd64
@ -459,7 +472,10 @@ jobs:
merge-multiple: true merge-multiple: true
- run: | - run: |
ls -lh dist/ ls -lh dist/
(cd dist; sha256sum * > sha256sum.txt) (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
mv sha256sum.txt dist/
mv dist/linux-???64 .
mv dist/linux-amd64-rocm .
cat dist/sha256sum.txt cat dist/sha256sum.txt
- name: Create or update Release - name: Create or update Release
run: | run: |

View File

@ -63,7 +63,7 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- run: go get ./... - run: go get ./...
- run: | - run: |
@ -163,7 +163,7 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- name: 'Install ROCm' - name: 'Install ROCm'
run: | run: |
@ -200,7 +200,7 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- name: 'Install CUDA' - name: 'Install CUDA'
run: | run: |
@ -255,7 +255,7 @@ jobs:
submodules: recursive submodules: recursive
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: false cache: false
- run: | - run: |
case ${{ matrix.arch }} in case ${{ matrix.arch }} in
@ -297,7 +297,7 @@ jobs:
submodules: recursive submodules: recursive
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: "stable" go-version-file: go.mod
cache: true cache: true
- run: | - run: |
case ${{ matrix.arch }} in case ${{ matrix.arch }} in

View File

@ -24,7 +24,6 @@ linters:
- nosprintfhostport - nosprintfhostport
- staticcheck - staticcheck
- tenv - tenv
- testifylint
- unconvert - unconvert
- unused - unused
- usestdlibvars - usestdlibvars

37
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,37 @@
# Contributing to Ollama
Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
## Set up
See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
## Pull requests
### Ideal issues
* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
### Issues that are harder to review
* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
* Documentation: small updates to fill in or dorrect missing documentation is helpful, however large documentation additions can be hard to maintain over time.
### Issues that may not be accepted
* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
* Changes that add significant friction to the user experience
* Changes that create a large future maintenance burden for maintainers and contributors
### Best practices
* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
* Tests: please add test coverage to changes where possible.
* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
## Need help?
If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).

View File

@ -1,7 +1,9 @@
ARG GOLANG_VERSION=1.22.5 ARG GOLANG_VERSION=1.22.5
ARG CMAKE_VERSION=3.22.1 ARG CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md ARG CUDA_VERSION_11=11.3.1
ARG CUDA_VERSION=11.3.1 ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG CUDA_VERSION_12=12.4.0
ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG ROCM_VERSION=6.1.2 ARG ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts # Copy the minimal context we need to run the generate scripts
@ -10,7 +12,7 @@ COPY .git .git
COPY .gitmodules .gitmodules COPY .gitmodules .gitmodules
COPY llm llm COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_11-devel-centos7 AS cuda-11-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh ARG CUDA_V11_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH amd64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh ARG CUDA_V11_ARCHITECTURES
ENV GOARCH arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V11_ARCHITECTURES}" \
CUDA_VARIANT="_v11" \
bash gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG CMAKE_VERSION
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS
ARG CUDA_V12_ARCHITECTURES
ENV GOARCH arm64
RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 \
OLLAMA_SKIP_CPU_GENERATE=1 \
CMAKE_CUDA_ARCHITECTURES="${CUDA_V12_ARCHITECTURES}" \
CUDA_VARIANT="_v12" \
OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
bash gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
ARG CGO_CFLAGS ARG CGO_CFLAGS
ARG AMDGPU_TARGETS ARG AMDGPU_TARGETS
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh ENV GOARCH amd64
RUN mkdir /tmp/scratch && \ RUN --mount=type=cache,target=/root/.ccache \
for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \ OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 bash gen_linux.sh
cp ${dep} /tmp/scratch/ || exit 1 ; \ RUN mkdir -p ../../dist/linux-amd64-rocm/lib/ollama && \
done && \ (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd ../../dist/linux-amd64-rocm/lib/ollama && tar xf - )
(cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
(cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )
FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64 FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS ARG CGO_CFLAGS
ENV GOARCH amd64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" bash gen_linux.sh
FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64 FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" bash gen_linux.sh
FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64 FROM --platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY --from=llm-code / /go/src/github.com/ollama/ollama/ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS ARG CGO_CFLAGS
ENV GOARCH arm64
WORKDIR /go/src/github.com/ollama/ollama/llm/generate WORKDIR /go/src/github.com/ollama/ollama/llm/generate
FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64 FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_CPU_TARGET="static" bash gen_linux.sh
FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64 FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh RUN --mount=type=cache,target=/root/.ccache \
OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh # Intermediate stage used for ./scripts/build_linux.sh
@ -95,12 +151,16 @@ COPY . .
COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN go build -trimpath . RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh # Intermediate stage used for ./scripts/build_linux.sh
FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
@ -109,23 +169,36 @@ ARG GOLANG_VERSION
WORKDIR /go/src/github.com/ollama/ollama WORKDIR /go/src/github.com/ollama/ollama
COPY . . COPY . .
COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/ COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG GOFLAGS ARG GOFLAGS
ARG CGO_CFLAGS ARG CGO_CFLAGS
RUN go build -trimpath . RUN --mount=type=cache,target=/root/.ccache \
go build -trimpath -o dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM --platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
# Runtime stages # Runtime stages
FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64 FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY --from=amd64-libs-without-rocm /scratch/ /lib/
RUN apt-get update && apt-get install -y ca-certificates RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64 FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN apt-get update && apt-get install -y ca-certificates RUN apt-get update && apt-get install -y ca-certificates
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image # Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN update-pciids RUN update-pciids
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
RUN ln -s /opt/rocm/lib /lib/ollama
EXPOSE 11434 EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0 ENV OLLAMA_HOST 0.0.0.0

View File

@ -325,6 +325,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [tlm](https://github.com/yusufcanb/tlm) - [tlm](https://github.com/yusufcanb/tlm)
- [podman-ollama](https://github.com/ericcurtin/podman-ollama) - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
- [gollama](https://github.com/sammcj/gollama) - [gollama](https://github.com/sammcj/gollama)
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
### Database ### Database

View File

@ -298,7 +298,7 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return &lr, nil return &lr, nil
} }
// List running models. // ListRunning lists running models.
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) { func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
var lr ProcessResponse var lr ProcessResponse
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil { if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
@ -333,7 +333,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
return &resp, nil return &resp, nil
} }
// Hearbeat checks if the server has started and is responsive; if yes, it // Heartbeat checks if the server has started and is responsive; if yes, it
// returns nil, otherwise an error. // returns nil, otherwise an error.
func (c *Client) Heartbeat(ctx context.Context) error { func (c *Client) Heartbeat(ctx context.Context) error {
if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil { if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {

View File

@ -231,7 +231,6 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory // Runner options which must be set when the model is loaded into memory
type Runner struct { type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"` NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"` NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"` NumGPU int `json:"num_gpu,omitempty"`
@ -505,7 +504,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
for key, val := range m { for key, val := range m {
opt, ok := jsonOpts[key] opt, ok := jsonOpts[key]
if !ok { if !ok {
slog.Warn("invalid option provided", "option", opt.Name) slog.Warn("invalid option provided", "option", key)
continue continue
} }
@ -615,7 +614,6 @@ func DefaultOptions() Options {
F16KV: true, F16KV: true,
UseMLock: false, UseMLock: false,
UseMMap: nil, UseMMap: nil,
UseNUMA: false,
}, },
} }
} }

View File

@ -87,20 +87,11 @@ DialogFontSize=12
[Files] [Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit Source: "..\ollama.exe"; DestDir: "{app}\bin"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs Source: "..\dist\windows-{#ARCH}\lib\ollama\runners\*"; DestDir: "{app}\lib\ollama\runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
#if DirExists("..\dist\windows-amd64\cuda") Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\oneapi")
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
[Icons] [Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico" Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run] [Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden Filename: "{cmd}"; Parameters: "/C set PATH={app}\bin;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun] [UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden ; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry] [Registry]
Root: HKCU; Subkey: "Environment"; \ Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \ ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}\bin"; \
Check: NeedsAddPath('{app}') Check: NeedsAddPath('{app}\bin')
[Code] [Code]

View File

@ -11,12 +11,12 @@ import (
) )
const ( const (
updatAvailableMenuID = 1 updateAvailableMenuID = 1
updateMenuID = updatAvailableMenuID + 1 updateMenuID = updateAvailableMenuID + 1
separatorMenuID = updateMenuID + 1 separatorMenuID = updateMenuID + 1
diagLogsMenuID = separatorMenuID + 1 diagLogsMenuID = separatorMenuID + 1
diagSeparatorMenuID = diagLogsMenuID + 1 diagSeparatorMenuID = diagLogsMenuID + 1
quitMenuID = diagSeparatorMenuID + 1 quitMenuID = diagSeparatorMenuID + 1
) )
func (t *winTray) initMenus() error { func (t *winTray) initMenus() error {
@ -35,7 +35,7 @@ func (t *winTray) initMenus() error {
func (t *winTray) UpdateAvailable(ver string) error { func (t *winTray) UpdateAvailable(ver string) error {
if !t.updateNotified { if !t.updateNotified {
slog.Debug("updating menu and sending notification for new update") slog.Debug("updating menu and sending notification for new update")
if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil { if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
return fmt.Errorf("unable to create menu entries %w", err) return fmt.Errorf("unable to create menu entries %w", err)
} }
if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil { if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {

View File

@ -11,6 +11,7 @@ import (
"path/filepath" "path/filepath"
"sort" "sort"
"sync" "sync"
"syscall"
"unsafe" "unsafe"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
@ -433,7 +434,12 @@ func (t *winTray) setIcon(src string) error {
t.muNID.Lock() t.muNID.Lock()
defer t.muNID.Unlock() defer t.muNID.Unlock()
t.nid.Icon = h t.nid.Icon = h
t.nid.Flags |= NIF_ICON t.nid.Flags |= NIF_ICON | NIF_TIP
if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
copy(t.nid.Tip[:], toolTipUTF16)
} else {
return err
}
t.nid.Size = uint32(unsafe.Sizeof(*t.nid)) t.nid.Size = uint32(unsafe.Sizeof(*t.nid))
return t.nid.modify() return t.nid.modify()

View File

@ -61,6 +61,7 @@ const (
MIIM_SUBMENU = 0x00000004 MIIM_SUBMENU = 0x00000004
MIM_APPLYTOSUBMENUS = 0x80000000 MIM_APPLYTOSUBMENUS = 0x80000000
NIF_ICON = 0x00000002 NIF_ICON = 0x00000002
NIF_TIP = 0x00000004
NIF_INFO = 0x00000010 NIF_INFO = 0x00000010
NIF_MESSAGE = 0x00000001 NIF_MESSAGE = 0x00000001
SW_HIDE = 0 SW_HIDE = 0

View File

@ -22,6 +22,7 @@ import (
"runtime" "runtime"
"slices" "slices"
"strings" "strings"
"sync/atomic"
"syscall" "syscall"
"time" "time"
@ -78,6 +79,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
status := "transferring model data" status := "transferring model data"
spinner := progress.NewSpinner(status) spinner := progress.NewSpinner(status)
p.Add(status, spinner) p.Add(status, spinner)
defer p.Stop()
for i := range modelfile.Commands { for i := range modelfile.Commands {
switch modelfile.Commands[i].Name { switch modelfile.Commands[i].Name {
@ -112,7 +114,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
path = tempfile path = tempfile
} }
digest, err := createBlob(cmd, client, path) digest, err := createBlob(cmd, client, path, spinner)
if err != nil { if err != nil {
return err return err
} }
@ -221,6 +223,14 @@ func tempZipFiles(path string) (string, error) {
} }
files = append(files, js...) files = append(files, js...)
// bert models require a nested config.json
// TODO(mxyng): merge this with the glob above
js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
if err != nil {
return "", err
}
files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
// tokenizer.model might be a unresolved git lfs reference; error if it is // tokenizer.model might be a unresolved git lfs reference; error if it is
@ -250,6 +260,11 @@ func tempZipFiles(path string) (string, error) {
return "", err return "", err
} }
zfi.Name, err = filepath.Rel(path, file)
if err != nil {
return "", err
}
zf, err := zipfile.CreateHeader(zfi) zf, err := zipfile.CreateHeader(zfi)
if err != nil { if err != nil {
return "", err return "", err
@ -263,13 +278,20 @@ func tempZipFiles(path string) (string, error) {
return tempfile.Name(), nil return tempfile.Name(), nil
} }
func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) { func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
bin, err := os.Open(path) bin, err := os.Open(path)
if err != nil { if err != nil {
return "", err return "", err
} }
defer bin.Close() defer bin.Close()
// Get file info to retrieve the size
fileInfo, err := bin.Stat()
if err != nil {
return "", err
}
fileSize := fileInfo.Size()
hash := sha256.New() hash := sha256.New()
if _, err := io.Copy(hash, bin); err != nil { if _, err := io.Copy(hash, bin); err != nil {
return "", err return "", err
@ -279,13 +301,43 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
return "", err return "", err
} }
var pw progressWriter
status := "transferring model data 0%"
spinner.SetMessage(status)
done := make(chan struct{})
defer close(done)
go func() {
ticker := time.NewTicker(60 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ticker.C:
spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
case <-done:
spinner.SetMessage("transferring model data 100%")
return
}
}
}()
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil)) digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil { if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
return "", err return "", err
} }
return digest, nil return digest, nil
} }
type progressWriter struct {
n atomic.Int64
}
func (w *progressWriter) Write(p []byte) (n int, err error) {
w.n.Add(int64(len(p)))
return len(p), nil
}
func RunHandler(cmd *cobra.Command, args []string) error { func RunHandler(cmd *cobra.Command, args []string) error {
interactive := true interactive := true
@ -1086,7 +1138,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
return nil return nil
} }
func RunServer(cmd *cobra.Command, _ []string) error { func RunServer(_ *cobra.Command, _ []string) error {
if err := initializeKeypair(); err != nil { if err := initializeKeypair(); err != nil {
return err return err
} }

View File

@ -7,6 +7,7 @@ import (
"io" "io"
"io/fs" "io/fs"
"log/slog" "log/slog"
"strings"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
) )
@ -27,6 +28,10 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
"tokenizer.ggml.token_type": t.Vocabulary.Types, "tokenizer.ggml.token_type": t.Vocabulary.Types,
} }
if len(t.Merges) > 0 {
kv["tokenizer.ggml.merges"] = t.Merges
}
if t.Template != "" { if t.Template != "" {
kv["tokenizer.chat_template"] = t.Template kv["tokenizer.chat_template"] = t.Template
} }
@ -54,14 +59,20 @@ type Converter interface {
KV(*Tokenizer) llm.KV KV(*Tokenizer) llm.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor Tensors([]Tensor) []llm.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
// tensorName returns the LLM tensor name for a specific input name
tensorName(string) string
// specialTokenTypes returns any special token types the model uses // specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
} }
type moreParser interface {
parseMore(fs.FS) error
}
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path. // and files it finds in the input path.
// Supported input model formats include safetensors. // Supported input model formats include safetensors.
@ -89,6 +100,12 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
conv = &mixtral{} conv = &mixtral{}
case "GemmaForCausalLM": case "GemmaForCausalLM":
conv = &gemma{} conv = &gemma{}
case "Gemma2ForCausalLM":
conv = &gemma2{}
case "Phi3ForCausalLM":
conv = &phi3{}
case "BertModel":
conv = &bert{}
default: default:
return errors.New("unsupported architecture") return errors.New("unsupported architecture")
} }
@ -97,6 +114,12 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
return err return err
} }
if t, ok := conv.(moreParser); ok {
if err := t.parseMore(fsys); err != nil {
return err
}
}
t, err := parseTokenizer(fsys, conv.specialTokenTypes()) t, err := parseTokenizer(fsys, conv.specialTokenTypes())
if err != nil { if err != nil {
return err return err
@ -113,7 +136,7 @@ func Convert(fsys fs.FS, ws io.WriteSeeker) error {
slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
} }
ts, err := parseTensors(fsys) ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
if err != nil { if err != nil {
return err return err
} }

174
convert/convert_bert.go Normal file
View File

@ -0,0 +1,174 @@
package convert
import (
"cmp"
"encoding/json"
"io/fs"
"path/filepath"
"slices"
"strings"
"github.com/ollama/ollama/llm"
)
type bert struct {
Parameters
NLayers uint32 `json:"n_layers"`
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayer uint32 `json:"n_layer"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
NCtx uint32 `json:"n_ctx"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NInner uint32 `json:"n_inner"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
LayerNormEPS float32 `json:"layer_norm_eps"`
LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
NormEpsilon float32 `json:"norm_epsilon"`
PoolingType uint32
}
var (
_ Converter = (*bert)(nil)
_ moreParser = (*bert)(nil)
)
func (p *bert) parseMore(fsys fs.FS) error {
bts, err := fs.ReadFile(fsys, "modules.json")
if err != nil {
return err
}
var modules []struct {
Type string `json:"type"`
Path string `json:"path"`
}
if err := json.Unmarshal(bts, &modules); err != nil {
return err
}
var pooling string
for _, m := range modules {
if m.Type == "sentence_transformers.models.Pooling" {
pooling = m.Path
break
}
}
if pooling != "" {
bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
if err != nil {
return err
}
var pc struct {
PoolingModeCLSToken bool `json:"pooling_mode_cls_token"`
PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
}
if err := json.Unmarshal(bts, &pc); err != nil {
return err
}
if pc.PoolingModeMeanTokens {
p.PoolingType = 1
} else if pc.PoolingModeCLSToken {
p.PoolingType = 2
}
}
return nil
}
func (p *bert) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "bert"
kv["bert.attention.causal"] = false
kv["bert.pooling_type"] = p.PoolingType
kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
kv["bert.context_length"] = contextLength
}
if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
}
if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
}
if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
}
if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
}
kv["tokenizer.ggml.model"] = "bert"
kv["tokenizer.ggml.token_type_count"] = uint32(2)
// convert to phantom space tokens
for i, e := range t.Tokens {
if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
// noop
} else if strings.HasPrefix(e, "##") {
t.Tokens[i] = e[2:]
} else {
t.Tokens[i] = "\u2581" + e
}
}
kv["tokenizer.ggml.tokens"] = t.Tokens
return kv
}
func (p *bert) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
if slices.Contains([]string{
"embeddings.position_ids",
"pooler.dense.weight",
"pooler.dense.bias",
}, t.Name()) {
continue
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (bert) Replacements() []string {
return []string{
"encoder.layer", "blk",
"encoder.layers", "blk",
"embeddings.word_embeddings", "token_embd",
"embeddings.token_type_embeddings", "token_types",
"embeddings.LayerNorm", "token_embd_norm",
"embeddings.position_embeddings", "position_embd",
"attention.self.query", "attn_q",
"attention.self.key", "attn_k",
"attention.self.value", "attn_v",
"attention.output.dense", "attn_output",
"attention.output.LayerNorm", "attn_output_norm",
"intermediate.dense", "ffn_up",
"output.dense", "ffn_down",
"output.LayerNorm", "layer_output_norm",
}
}

View File

@ -26,7 +26,6 @@ var _ Converter = (*gemma)(nil)
func (p *gemma) KV(t *Tokenizer) llm.KV { func (p *gemma) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t) kv := p.Parameters.KV(t)
kv["general.architecture"] = "gemma" kv["general.architecture"] = "gemma"
kv["general.name"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings kv["gemma.context_length"] = p.MaxPositionEmbeddings
kv["gemma.embedding_length"] = p.HiddenSize kv["gemma.embedding_length"] = p.HiddenSize
kv["gemma.block_count"] = p.HiddenLayers kv["gemma.block_count"] = p.HiddenLayers
@ -44,15 +43,14 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
} }
func (p *gemma) Tensors(ts []Tensor) []llm.Tensor { func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor out := make([]llm.Tensor, 0, len(ts))
for _, t := range ts { for _, t := range ts {
name := p.tensorName(t.Name()) if strings.HasSuffix(t.Name(), "_norm.weight") {
if strings.HasSuffix(name, "_norm.weight") {
t.SetRepacker(p.addOne) t.SetRepacker(p.addOne)
} }
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: name, Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
WriterTo: t, WriterTo: t,
@ -62,8 +60,8 @@ func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
return out return out
} }
func (p *gemma) tensorName(n string) string { func (p *gemma) Replacements() []string {
return strings.NewReplacer( return []string{
"model.embed_tokens", "token_embd", "model.embed_tokens", "token_embd",
"model.norm", "output_norm", "model.norm", "output_norm",
"model.layers", "blk", "model.layers", "blk",
@ -76,8 +74,7 @@ func (p *gemma) tensorName(n string) string {
"mlp.down_proj", "ffn_down", "mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up", "mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm", "post_attention_layernorm", "ffn_norm",
"block_sparse_moe.gate", "ffn_inp", }
).Replace(n)
} }
func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) { func (*gemma) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {

43
convert/convert_gemma2.go Normal file
View File

@ -0,0 +1,43 @@
package convert
import (
"github.com/ollama/ollama/llm"
)
type gemma2 struct {
gemma
SlidingWindow uint32 `json:"sliding_window"`
AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
}
func (p *gemma2) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings
kv["gemma2.embedding_length"] = p.HiddenSize
kv["gemma2.block_count"] = p.HiddenLayers
kv["gemma2.feed_forward_length"] = p.IntermediateSize
kv["gemma2.attention.head_count"] = p.NumAttentionHeads
kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["gemma2.attention.key_length"] = p.HeadDim
kv["gemma2.attention.value_length"] = p.HeadDim
kv["gemma2.attention.sliding_window"] = p.SlidingWindow
kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
kv["tokenizer.ggml.eot_token_id"] = uint32(107)
kv["tokenizer.ggml.middle_token_id"] = uint32(68)
kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
return kv
}
func (p *gemma2) Replacements() []string {
return append(
p.gemma.Replacements(),
"post_attention_layernorm", "post_attention_norm",
"pre_feedforward_layernorm", "ffn_norm",
"post_feedforward_layernorm", "post_ffw_norm",
)
}

View File

@ -3,6 +3,7 @@ package convert
import ( import (
"cmp" "cmp"
"fmt" "fmt"
"math"
"strings" "strings"
"github.com/pdevine/tensor" "github.com/pdevine/tensor"
@ -27,8 +28,14 @@ type llama struct {
NumKeyValueHeads uint32 `json:"num_key_value_heads"` NumKeyValueHeads uint32 `json:"num_key_value_heads"`
RopeTheta float32 `json:"rope_theta"` RopeTheta float32 `json:"rope_theta"`
RopeScaling struct { RopeScaling struct {
Type string `json:"type"` Type string `json:"type"`
Factor float32 `json:"factor"` RopeType string `json:"rope_type"`
Factor float32 `json:"factor"`
LowFrequencyFactor float32 `json:"low_freq_factor"`
HighFrequencyFactor float32 `json:"high_freq_factor"`
OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
factors ropeFactor
} `json:"rope_scaling"` } `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"` RMSNormEPS float32 `json:"rms_norm_eps"`
LayerNormEPS float32 `json:"layer_norm_eps"` LayerNormEPS float32 `json:"layer_norm_eps"`
@ -42,7 +49,6 @@ var _ Converter = (*llama)(nil)
func (p *llama) KV(t *Tokenizer) llm.KV { func (p *llama) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t) kv := p.Parameters.KV(t)
kv["general.architecture"] = "llama" kv["general.architecture"] = "llama"
kv["general.name"] = "llama"
kv["llama.vocab_size"] = p.VocabSize kv["llama.vocab_size"] = p.VocabSize
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer) kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
@ -71,6 +77,27 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
if p.RopeScaling.Type == "linear" { if p.RopeScaling.Type == "linear" {
kv["llama.rope.scaling.type"] = p.RopeScaling.Type kv["llama.rope.scaling.type"] = p.RopeScaling.Type
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
} else if p.RopeScaling.RopeType == "llama3" {
dim := p.HiddenSize / p.NumAttentionHeads
for i := uint32(0); i < dim; i += 2 {
factor := cmp.Or(p.RopeScaling.Factor, 8.0)
factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
lambdaLow := float32(original) / factorLow
lambdaHigh := float32(original) / factorHigh
lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
if lambda < float64(lambdaHigh) {
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
} else if lambda > float64(lambdaLow) {
p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
} else {
smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
}
}
} }
if p.NumKeyValueHeads > 0 { if p.NumKeyValueHeads > 0 {
@ -90,24 +117,29 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
kv["llama.attention.value_length"] = p.HeadDim kv["llama.attention.value_length"] = p.HeadDim
} }
if len(t.Merges) > 0 {
kv["tokenizer.ggml.merges"] = t.Merges
}
return kv return kv
} }
func (p *llama) Tensors(ts []Tensor) []llm.Tensor { func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor var out []llm.Tensor
if p.RopeScaling.factors != nil {
out = append(out, llm.Tensor{
Name: "rope_freqs.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
WriterTo: p.RopeScaling.factors,
})
}
for _, t := range ts { for _, t := range ts {
name := p.tensorName(t.Name()) if strings.HasSuffix(t.Name(), "attn_q.weight") ||
if strings.HasSuffix(name, "attn_q.weight") || strings.HasSuffix(t.Name(), "attn_k.weight") {
strings.HasSuffix(name, "attn_k.weight") {
t.SetRepacker(p.repack) t.SetRepacker(p.repack)
} }
out = append(out, llm.Tensor{ out = append(out, llm.Tensor{
Name: name, Name: t.Name(),
Kind: t.Kind(), Kind: t.Kind(),
Shape: t.Shape(), Shape: t.Shape(),
WriterTo: t, WriterTo: t,
@ -117,8 +149,8 @@ func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
return out return out
} }
func (p *llama) tensorName(n string) string { func (p *llama) Replacements() []string {
return strings.NewReplacer( return []string{
"lm_head", "output", "lm_head", "output",
"model.embed_tokens", "token_embd", "model.embed_tokens", "token_embd",
"model.norm", "output_norm", "model.norm", "output_norm",
@ -132,9 +164,7 @@ func (p *llama) tensorName(n string) string {
"mlp.down_proj", "ffn_down", "mlp.down_proj", "ffn_down",
"mlp.up_proj", "ffn_up", "mlp.up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm", "post_attention_layernorm", "ffn_norm",
// mixtral }
"block_sparse_moe.gate", "ffn_gate_inp",
).Replace(n)
} }
func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) { func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32, error) {
@ -144,9 +174,9 @@ func (p *llama) repack(name string, data []float32, shape []uint64) ([]float32,
} }
var heads uint32 var heads uint32
if strings.HasSuffix(name, "q_proj.weight") { if strings.HasSuffix(name, "attn_q.weight") {
heads = p.NumAttentionHeads heads = p.NumAttentionHeads
} else if strings.HasSuffix(name, "k_proj.weight") { } else if strings.HasSuffix(name, "attn_k.weight") {
heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads) heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
} else { } else {
return nil, fmt.Errorf("unknown tensor for repack: %s", name) return nil, fmt.Errorf("unknown tensor for repack: %s", name)

View File

@ -15,8 +15,6 @@ type mixtral struct {
NumExpertsPerToken uint32 `json:"num_experts_per_tok"` NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
} }
var _ Converter = (*mixtral)(nil)
func (p *mixtral) KV(t *Tokenizer) llm.KV { func (p *mixtral) KV(t *Tokenizer) llm.KV {
kv := p.llama.KV(t) kv := p.llama.KV(t)
@ -72,6 +70,13 @@ func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
return append(out, p.llama.Tensors(ts)...) return append(out, p.llama.Tensors(ts)...)
} }
func (p *mixtral) Replacements() []string {
return append(
p.llama.Replacements(),
"block_sparse_moe.gate", "ffn_gate_inp",
)
}
type experts []Tensor type experts []Tensor
func (e experts) WriteTo(w io.Writer) (int64, error) { func (e experts) WriteTo(w io.Writer) (int64, error) {

123
convert/convert_phi3.go Normal file
View File

@ -0,0 +1,123 @@
package convert
import (
"cmp"
"encoding/binary"
"io"
"math"
"strings"
"sync"
"github.com/ollama/ollama/llm"
)
type phi3 struct {
Parameters
NumHiddenLayers uint32 `json:"num_hidden_layers"`
NLayers uint32 `json:"n_layers"`
HiddenSize uint32 `json:"hidden_size"`
NEmbd uint32 `json:"n_embd"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NHead uint32 `json:"n_head"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
NHeadKV uint32 `json:"n_head_kv"`
RopeTheta float32 `json:"rope_theta"`
RopeScaling struct {
Type string `json:"type"`
LongFactor ropeFactor `json:"long_factor"`
ShortFactor ropeFactor `json:"short_factor"`
} `json:"rope_scaling"`
RMSNormEPS float32 `json:"rms_norm_eps"`
NPositions uint32 `json:"n_positions"`
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
SlidingWindow uint32 `json:"sliding_window"`
}
var _ Converter = (*phi3)(nil)
func (p *phi3) KV(t *Tokenizer) llm.KV {
kv := p.Parameters.KV(t)
kv["general.architecture"] = "phi3"
kv["phi3.context_length"] = p.MaxPositionEmbeddings
kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
kv["phi3.feed_forward_length"] = p.IntermediateSize
kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
kv["phi3.rope.freq_base"] = p.RopeTheta
kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
kv["phi3.attention.sliding_window"] = p.SlidingWindow
scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
switch p.RopeScaling.Type {
case "":
// no scaling
case "su", "longrope":
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
case "yarn":
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
default:
panic("unknown rope scaling type")
}
return kv
}
func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
var addRopeFactors sync.Once
out := make([]llm.Tensor, 0, len(ts)+2)
for _, t := range ts {
if strings.HasPrefix(t.Name(), "blk.0.") {
addRopeFactors.Do(func() {
out = append(out, llm.Tensor{
Name: "rope_factors_long.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
WriterTo: p.RopeScaling.LongFactor,
}, llm.Tensor{
Name: "rope_factors_short.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
WriterTo: p.RopeScaling.ShortFactor,
})
})
}
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *phi3) Replacements() []string {
return []string{
"lm_head", "output",
"model.embed_tokens", "token_embd",
"model.norm", "output_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"self_attn.qkv_proj", "attn_qkv",
"self_attn.o_proj", "attn_output",
"mlp.down_proj", "ffn_down",
"mlp.gate_up_proj", "ffn_up",
"post_attention_layernorm", "ffn_norm",
}
}
type ropeFactor []float32
func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
err := binary.Write(w, binary.LittleEndian, r)
return 0, err
}

View File

@ -62,9 +62,14 @@ func TestMain(m *testing.M) {
func TestConvertFull(t *testing.T) { func TestConvertFull(t *testing.T) {
cases := []string{ cases := []string{
"Meta-Llama-3-8B-Instruct", "Meta-Llama-3-8B-Instruct",
"Meta-Llama-3.1-8B-Instruct",
"Mistral-7B-Instruct-v0.2", "Mistral-7B-Instruct-v0.2",
"Mixtral-8x7B-Instruct-v0.1", "Mixtral-8x7B-Instruct-v0.1",
"gemma-2b-it", "gemma-2b-it",
// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
"Phi-3-mini-128k-instruct",
"all-MiniLM-L6-v2",
"gemma-2-9b-it",
} }
for i := range cases { for i := range cases {

View File

@ -35,7 +35,9 @@ const (
) )
func (t tensorBase) Kind() uint32 { func (t tensorBase) Kind() uint32 {
if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") { if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
t.name == "token_types.weight" {
// these tensors are always F32
return 0 return 0
} }
@ -55,10 +57,10 @@ func (t *tensorBase) SetRepacker(fn repacker) {
type repacker func(string, []float32, []uint64) ([]float32, error) type repacker func(string, []float32, []uint64) ([]float32, error)
func parseTensors(fsys fs.FS) ([]Tensor, error) { func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
patterns := []struct { patterns := []struct {
Pattern string Pattern string
Func func(fs.FS, ...string) ([]Tensor, error) Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
}{ }{
{"model-*-of-*.safetensors", parseSafetensors}, {"model-*-of-*.safetensors", parseSafetensors},
{"model.safetensors", parseSafetensors}, {"model.safetensors", parseSafetensors},
@ -74,7 +76,7 @@ func parseTensors(fsys fs.FS) ([]Tensor, error) {
} }
if len(matches) > 0 { if len(matches) > 0 {
return pattern.Func(fsys, matches...) return pattern.Func(fsys, replacer, matches...)
} }
} }

View File

@ -8,6 +8,7 @@ import (
"io" "io"
"io/fs" "io/fs"
"slices" "slices"
"strings"
"github.com/d4l3k/go-bfloat16" "github.com/d4l3k/go-bfloat16"
"github.com/x448/float16" "github.com/x448/float16"
@ -20,7 +21,7 @@ type safetensorMetadata struct {
Offsets []int64 `json:"data_offsets"` Offsets []int64 `json:"data_offsets"`
} }
func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) { func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
var ts []Tensor var ts []Tensor
for _, p := range ps { for _, p := range ps {
f, err := fsys.Open(p) f, err := fsys.Open(p)
@ -56,7 +57,7 @@ func parseSafetensors(fsys fs.FS, ps ...string) ([]Tensor, error) {
offset: safetensorsPad(n, value.Offsets[0]), offset: safetensorsPad(n, value.Offsets[0]),
size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]), size: safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
tensorBase: &tensorBase{ tensorBase: &tensorBase{
name: key, name: replacer.Replace(key),
shape: value.Shape, shape: value.Shape,
}, },
}) })

View File

@ -3,12 +3,13 @@ package convert
import ( import (
"io" "io"
"io/fs" "io/fs"
"strings"
"github.com/nlpodyssey/gopickle/pytorch" "github.com/nlpodyssey/gopickle/pytorch"
"github.com/nlpodyssey/gopickle/types" "github.com/nlpodyssey/gopickle/types"
) )
func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) { func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
var ts []Tensor var ts []Tensor
for _, p := range ps { for _, p := range ps {
pt, err := pytorch.Load(p) pt, err := pytorch.Load(p)
@ -27,7 +28,7 @@ func parseTorch(fsys fs.FS, ps ...string) ([]Tensor, error) {
ts = append(ts, torch{ ts = append(ts, torch{
storage: t.(*pytorch.Tensor).Source, storage: t.(*pytorch.Tensor).Source,
tensorBase: &tensorBase{ tensorBase: &tensorBase{
name: k.(string), name: replacer.Replace(k.(string)),
shape: shape, shape: shape,
}, },
}) })

View File

@ -0,0 +1,3 @@
{
"rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
}

View File

@ -0,0 +1,225 @@
{
"general.architecture": "phi3",
"general.file_type": "1",
"general.quantization_version": "2",
"phi3.block_count": "32",
"phi3.context_length": "131072",
"phi3.embedding_length": "3072",
"phi3.feed_forward_length": "8192",
"phi3.rope.scaling.original_context_length": "4096",
"phi3.rope.dimension_count": "96",
"phi3.rope.freq_base": "10000",
"phi3.rope.scaling.attn_factor": "1.1902381",
"phi3.attention.head_count": "32",
"phi3.attention.head_count_kv": "32",
"phi3.attention.layer_norm_rms_epsilon": "1e-05",
"phi3.attention.sliding_window": "262144",
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.pre": "default",
"tokenizer.ggml.add_bos_token": "false",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "1",
"tokenizer.ggml.eos_token_id": "32000",
"tokenizer.ggml.unknown_token_id": "0",
"tokenizer.ggml.padding_token_id": "32000",
"tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
"tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
"tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
"blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
"blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
"blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
"blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
"blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
"blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
"blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
"blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
"blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
"blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
"blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
"blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
"blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
"blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
"blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
"blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
"blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
"blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
"blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
"blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
"blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
"blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
"blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
"blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
"blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
"blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
"blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
"blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
"blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
"blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
"blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
"blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
"blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
"blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
"blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
"blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
"blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
"blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
"blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
"blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
"blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
"blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
"blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
"blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
"blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
"blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
"blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
"blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
"blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
"blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
"blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
"blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
"blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
"blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
"blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
"blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
"blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
"blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
"blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
"blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
"blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
"blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
"blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
"blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
"blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
"blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
"blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
"blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
"blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
"blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
"blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
"blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
"blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
"blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
"blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
"blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
"blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
"blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
"blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
"blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
"blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
"blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
"blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
"blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
"blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
"blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
"blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
"blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
"blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
"blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
"blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
"blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
"blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
"blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
"blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
"blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
"blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
"blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
"blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
"blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
"blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
"blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
"blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
"blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
"blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
"blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
"blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
"blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
"blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
"blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
"blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
"blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
"blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
"blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
"blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
"blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
"blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
"blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
"blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
"blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
"blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
"blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
"blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
"blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
"blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
"blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
"blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
"blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
"blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
"blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
"blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
"blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
"blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
"blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
"blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
"blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
"blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
"blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
"blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
"blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
"blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
"blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
"blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
"blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
"blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
"blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
"blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
"blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
"blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
"blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
"blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
"blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
"blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
"blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
"blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
"blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
"blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
"blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
"blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
"blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
"blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
"blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
"blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
"blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
"blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
"blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
"blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
"blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
"blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
"blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
"blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
"blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
"blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
"blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
"blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
"blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
"blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
"blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
"blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
"blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
"blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
"blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
"blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
"blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
"blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
"blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
"blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
"blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
"blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
"blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
"blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
"blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
"output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
"output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
"rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
"rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
"token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
}

124
convert/testdata/all-MiniLM-L6-v2.json vendored Normal file
View File

@ -0,0 +1,124 @@
{
"general.architecture": "bert",
"general.file_type": "1",
"general.quantization_version": "2",
"bert.attention.causal": "false",
"bert.attention.head_count": "12",
"bert.attention.layer_norm_epsilon": "1e-12",
"bert.block_count": "6",
"bert.context_length": "512",
"bert.embedding_length": "384",
"bert.feed_forward_length": "1536",
"bert.pooling_type": "1",
"tokenizer.ggml.model": "bert",
"tokenizer.ggml.padding_token_id": "0",
"tokenizer.ggml.unknown_token_id": "100",
"tokenizer.ggml.cls_token_id": "101",
"tokenizer.ggml.seperator_token_id": "102",
"tokenizer.ggml.mask_token_id": "103",
"tokenizer.ggml.token_type_count": "2",
"tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
"tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
"tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
"token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
"position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
"token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
"token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
"token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
"blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
"blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
"blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
"blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
"blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
"blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
"blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
"blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
"blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
"blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
"blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
"blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
"blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
"blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
"blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
"blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
"blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
"blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
"blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
"blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
"blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
"blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
"blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
"blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
"blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
"blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
"blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
"blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
"blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
"blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
"blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
"blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
"blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
"blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
"blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
"blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
"blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
"blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
"blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
"blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
"blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
"blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
"blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
"blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
"blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
"blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
"blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
"blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
"blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
"blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
"blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
"blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
"blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
"blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
"blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
"blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
"blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
"blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
"blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
"blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
"blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
"blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
"blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
"blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
"blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
"blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
"blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
"blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
"blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
"blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
"blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
"blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
"blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
"blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
"blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
"blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
"blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
"blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
"blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
"blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
"blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
"blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
"blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
"blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
"blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
"blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
"blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
"blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
"blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
"blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
"blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
"blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
"blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
"blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
"blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
"blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
}

6
convert/testdata/gemma-2-9b-it.json vendored Normal file
View File

@ -0,0 +1,6 @@
{
"general.architecture": "gemma2",
"gemma2.attention.sliding_window": "4096",
"gemma2.attn_logit_softcapping": "50",
"gemma2.final_logit_softcapping": "30"
}

View File

@ -1,7 +1,6 @@
package convert package convert
import ( import (
"cmp"
"crypto/sha256" "crypto/sha256"
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
@ -11,6 +10,8 @@ import (
"log/slog" "log/slog"
"os" "os"
"slices" "slices"
"golang.org/x/exp/maps"
) )
const ( const (
@ -184,32 +185,32 @@ func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
return nil, err return nil, err
} }
var tokens []token tokens := make(map[int]token, len(t.Model.Vocab))
for k, v := range t.Model.Vocab { for k, v := range t.Model.Vocab {
tokens = append(tokens, token{ tokens[v] = token{
ID: v, ID: v,
Content: k, Content: k,
}) }
} }
for _, t := range t.AddedTokens { for _, token := range t.AddedTokens {
t.UserDefined = true token.UserDefined = true
tokens = append(tokens, t) tokens[token.ID] = token
} }
slices.SortFunc(tokens, func(i, j token) int { keys := maps.Keys(tokens)
return cmp.Compare(i.ID, j.ID) slices.Sort(keys)
})
v := Vocabulary{Model: "gpt2"} v := Vocabulary{Model: "gpt2"}
for _, t := range tokens { for _, k := range keys {
v.Tokens = append(v.Tokens, t.Content) token := tokens[k]
v.Scores = append(v.Scores, float32(t.ID)) v.Tokens = append(v.Tokens, token.Content)
v.Scores = append(v.Scores, float32(token.ID))
switch { switch {
case t.Special: case token.Special:
v.Types = append(v.Types, tokenTypeControl) v.Types = append(v.Types, tokenTypeControl)
case t.UserDefined: case token.UserDefined:
v.Types = append(v.Types, tokenTypeUserDefined) v.Types = append(v.Types, tokenTypeUserDefined)
default: default:
v.Types = append(v.Types, tokenTypeNormal) v.Types = append(v.Types, tokenTypeNormal)

View File

@ -15,6 +15,11 @@ import (
) )
func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) { func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
ast, err := parseAdditionalSpecialTokens(fsys)
if err != nil {
return nil, err
}
bts, err := fs.ReadFile(fsys, "tokenizer.model") bts, err := fs.ReadFile(fsys, "tokenizer.model")
if err != nil { if err != nil {
return nil, err return nil, err
@ -37,7 +42,12 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
sentencepiece.ModelProto_SentencePiece_BYTE: sentencepiece.ModelProto_SentencePiece_BYTE:
v.Types = append(v.Types, int32(t)) v.Types = append(v.Types, int32(t))
default: default:
v.Types = append(v.Types, int32(sentencepiece.ModelProto_SentencePiece_NORMAL)) tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
if slices.Contains(ast, piece.GetPiece()) {
tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
}
v.Types = append(v.Types, tt)
} }
} }
@ -81,3 +91,23 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
return &v, nil return &v, nil
} }
func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
f, err := fsys.Open("special_tokens_map.json")
if errors.Is(err, os.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, err
}
defer f.Close()
var m struct {
AdditionalSpecialTokens []string `json:"additional_special_tokens"`
}
if err := json.NewDecoder(f).Decode(&m); err != nil {
return nil, err
}
return m.AdditionalSpecialTokens, nil
}

View File

@ -669,7 +669,7 @@ curl http://localhost:11434/api/chat -d '{
``` ```
curl http://localhost:11434/api/chat -d '{ curl http://localhost:11434/api/chat -d '{
"model": "mistral", "model": "llama3.1",
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@ -708,7 +708,7 @@ curl http://localhost:11434/api/chat -d '{
```json ```json
{ {
"model": "mistral:7b-instruct-v0.3-q4_K_M", "model": "llama3.1",
"created_at": "2024-07-22T20:33:28.123648Z", "created_at": "2024-07-22T20:33:28.123648Z",
"message": { "message": {
"role": "assistant", "role": "assistant",
@ -1175,7 +1175,10 @@ curl http://localhost:11434/api/embed -d '{
"embeddings": [[ "embeddings": [[
0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814, 0.010071029, -0.0017594862, 0.05007221, 0.04692972, 0.054916814,
0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348 0.008599704, 0.105441414, -0.025878139, 0.12958129, 0.031952348
]] ]],
"total_duration": 14143917,
"load_duration": 1019500,
"prompt_eval_count": 8
} }
``` ```

View File

@ -16,7 +16,9 @@ If the model being imported is one of these architectures, it can be imported di
- LlamaForCausalLM - LlamaForCausalLM
- MistralForCausalLM - MistralForCausalLM
- MixtralForCausalLM
- GemmaForCausalLM - GemmaForCausalLM
- Phi3ForCausalLM
```dockerfile ```dockerfile
FROM /path/to/safetensors/directory FROM /path/to/safetensors/directory

View File

@ -20,13 +20,12 @@ GPU.
## Manual install ## Manual install
### Download the `ollama` binary ### Download `ollama`
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH: Download and extract the Linux package:
```bash ```bash
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
sudo chmod +x /usr/bin/ollama
``` ```
### Adding Ollama as a startup service (recommended) ### Adding Ollama as a startup service (recommended)
@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary: Or by downloading the ollama binary:
```bash ```bash
sudo curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
sudo chmod +x /usr/bin/ollama
``` ```
## Installing specific versions ## Installing specific versions

View File

@ -182,7 +182,6 @@ curl http://localhost:11434/v1/embeddings \
- [x] Reproducible outputs - [x] Reproducible outputs
- [x] Vision - [x] Vision
- [x] Tools (streaming support coming soon) - [x] Tools (streaming support coming soon)
- [ ] Vision
- [ ] Logprobs - [ ] Logprobs
#### Supported request fields #### Supported request fields

View File

@ -112,15 +112,9 @@ Keep the following tips and best practices in mind when working with Go template
ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2. ChatML is a popular template format. It can be used for models such as Databrick's DBRX, Intel's Neural Chat, and Microsoft's Orca 2.
```gotmpl ```gotmpl
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}
{{- range .Messages }}<|im_start|>{{ .Role }} {{- range .Messages }}<|im_start|>{{ .Role }}
{{ .Content }}<|im_end|> {{ .Content }}<|im_end|>
{{ end }}<|im_start|>assistant {{ end }}<|im_start|>assistant
{{ else }}
{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
``` ```
### Example Tools ### Example Tools

View File

@ -174,7 +174,7 @@ func RunnersDir() (p string) {
defer func() { defer func() {
if p == "" { if p == "" {
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'") slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
} }
}() }()
@ -190,17 +190,17 @@ func RunnersDir() (p string) {
} }
var paths []string var paths []string
for _, root := range []string{filepath.Dir(exe), cwd} { for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), ".."), cwd} {
paths = append(paths, paths = append(paths,
root, root,
filepath.Join(root, "windows-"+runtime.GOARCH), filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
filepath.Join(root, "dist", "windows-"+runtime.GOARCH), filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
) )
} }
// Try a few variations to improve developer experience when building from source in the local tree // Try a few variations to improve developer experience when building from source in the local tree
for _, path := range paths { for _, path := range paths {
candidate := filepath.Join(path, "ollama_runners") candidate := filepath.Join(path, "lib", "ollama", "runners")
if _, err := os.Stat(candidate); err == nil { if _, err := os.Stat(candidate); err == nil {
p = candidate p = candidate
break break

2
go.mod
View File

@ -1,6 +1,6 @@
module github.com/ollama/ollama module github.com/ollama/ollama
go 1.22.0 go 1.22.5
require ( require (
github.com/containerd/console v1.0.3 github.com/containerd/console v1.0.3

View File

@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary // Installer payload location if we're running the installed binary
exe, err := os.Executable() exe, err := os.Executable()
if err == nil { if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm") rocmTargetDir := filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
if rocmLibUsable(rocmTargetDir) { if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir) slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil return rocmTargetDir, nil

View File

@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location) // Installer payload (if we're running from some other location)
localAppData := os.Getenv("LOCALAPPDATA") localAppData := os.Getenv("LOCALAPPDATA")
appDir := filepath.Join(localAppData, "Programs", "Ollama") appDir := filepath.Join(localAppData, "Programs", "Ollama")
rocmTargetDir := filepath.Join(appDir, "rocm") rocmTargetDir := filepath.Join(appDir, "..", "lib", "ollama")
if rocmLibUsable(rocmTargetDir) { if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir) slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
return rocmTargetDir, nil return rocmTargetDir, nil

View File

@ -49,13 +49,9 @@ func PayloadsDir() (string, error) {
} }
// Track our pid so we can clean up orphaned tmpdirs // Track our pid so we can clean up orphaned tmpdirs
pidFilePath := filepath.Join(tmpDir, "ollama.pid") n := filepath.Join(tmpDir, "ollama.pid")
pidFile, err := os.OpenFile(pidFilePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm) if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
if err != nil { return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
return "", err
}
if _, err := pidFile.Write([]byte(strconv.Itoa(os.Getpid()))); err != nil {
return "", err
} }
// We create a distinct subdirectory for payloads within the tmpdir // We create a distinct subdirectory for payloads within the tmpdir
@ -67,37 +63,44 @@ func PayloadsDir() (string, error) {
// Best effort to clean up prior tmpdirs // Best effort to clean up prior tmpdirs
func cleanupTmpDirs() { func cleanupTmpDirs() {
dirs, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*")) matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
if err != nil { if err != nil {
return return
} }
for _, d := range dirs {
info, err := os.Stat(d) for _, match := range matches {
if err != nil || !info.IsDir() { raw, err := os.ReadFile(match)
if errors.Is(err, os.ErrNotExist) {
slog.Debug("not a ollama runtime directory, skipping", "path", match)
continue continue
} } else if err != nil {
raw, err := os.ReadFile(filepath.Join(d, "ollama.pid")) slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
if err != nil {
slog.Warn("failed to read ollama.pid", "path", d, "error", err)
// No pid, ignore this tmpdir
continue continue
} }
pid, err := strconv.Atoi(string(raw)) pid, err := strconv.Atoi(string(raw))
if err != nil { if err != nil {
slog.Warn("failed to parse pid", "path", d, "error", err) slog.Warn("invalid pid, skipping", "path", match, "error", err)
continue continue
} }
proc, err := os.FindProcess(pid) p, err := os.FindProcess(pid)
if err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) { if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
slog.Warn("found running ollama", "pid", pid, "path", d) slog.Warn("process still running, skipping", "pid", pid, "path", match)
// Another running ollama, ignore this tmpdir
continue continue
} }
if err := os.Remove(d); err != nil { if err := os.Remove(match); err != nil {
slog.Warn("unable to cleanup stale tmpdir", "path", d, "error", err) slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
}
runners := filepath.Join(filepath.Dir(match), "runners")
if err := os.RemoveAll(runners); err != nil {
slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
}
if err := os.Remove(filepath.Dir(match)); err != nil {
slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
} }
} }
} }

View File

@ -1,6 +1,11 @@
package gpu package gpu
import ( import (
"os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/cpu" "golang.org/x/sys/cpu"
) )
@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
// else LCD // else LCD
return CPUCapabilityNone return CPUCapabilityNone
} }
func IsNUMA() bool {
if runtime.GOOS != "linux" {
// numa support in llama.cpp is linux only
return false
}
ids := map[string]interface{}{}
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
for _, packageId := range packageIds {
id, err := os.ReadFile(packageId)
if err == nil {
ids[strings.TrimSpace(string(id))] = struct{}{}
}
}
return len(ids) > 1
}

View File

@ -4,9 +4,17 @@ package gpu
import ( import (
"log/slog" "log/slog"
"os"
"regexp"
"runtime"
"strconv"
"strings" "strings"
) )
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids := []string{} ids := []string{}
for _, info := range gpuInfo { for _, info := range gpuInfo {
@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
} }
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",") return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
} }
func cudaVariant(gpuInfo CudaGPUInfo) string {
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".")
if len(ver) > 0 {
return "jetpack" + ver[0]
}
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
r := regexp.MustCompile(` R(\d+) `)
m := r.FindSubmatch(data)
if len(m) != 2 {
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
} else {
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch l4t {
case 35:
return "jetpack5"
case 36:
return "jetpack6"
default:
slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
}
}
}
}
}
if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 {
return "v11"
}
return "v12"
}

View File

@ -64,10 +64,6 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory // TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
// Note: gpuMutex must already be held // Note: gpuMutex must already be held
func initCudaHandles() *cudaHandles { func initCudaHandles() *cudaHandles {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing // TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
@ -215,7 +211,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo: GpuInfo{ GpuInfo: GpuInfo{
memInfo: mem, memInfo: mem,
Library: "cpu", Library: "cpu",
Variant: cpuCapability, Variant: cpuCapability.String(),
ID: "0", ID: "0",
}, },
}, },
@ -229,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
return GpuInfoList{cpus[0].GpuInfo} return GpuInfoList{cpus[0].GpuInfo}
} }
// On windows we bundle the nvidia library one level above the runner dir depPath := LibraryDir()
depPath := ""
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "cuda")
}
// Load ALL libraries // Load ALL libraries
cHandles = initCudaHandles() cHandles = initCudaHandles()
@ -269,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.FreeMemory = uint64(memInfo.free) gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.computeMajor = int(memInfo.major)
gpuInfo.computeMinor = int(memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath variant := cudaVariant(gpuInfo)
if depPath != "" {
gpuInfo.DependencyPath = depPath
// Check for variant specific directory
if variant != "" {
if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
}
}
}
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor gpuInfo.DriverMinor = driverMinor
gpuInfo.Variant = variant
// query the management library as well so we can record any skew between the two // query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates // which represents overhead on the GPU we must set aside on subsequent updates
@ -305,38 +309,34 @@ func GetGPUInfo() GpuInfoList {
// Intel // Intel
if envconfig.IntelGPU() { if envconfig.IntelGPU() {
oHandles = initOneAPIHandles() oHandles = initOneAPIHandles()
// On windows we bundle the oneapi library one level above the runner dir if oHandles != nil && oHandles.oneapi != nil {
depPath = "" for d := range oHandles.oneapi.num_drivers {
if runtime.GOOS == "windows" && envconfig.RunnersDir() != "" { if oHandles.oneapi == nil {
depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir()), "oneapi") // shouldn't happen
} slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
continue
for d := range oHandles.oneapi.num_drivers { }
if oHandles.oneapi == nil { devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
// shouldn't happen for i := range devCount {
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers)) gpuInfo := OneapiGPUInfo{
continue GpuInfo: GpuInfo{
} Library: "oneapi",
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d)) },
for i := range devCount { driverIndex: int(d),
gpuInfo := OneapiGPUInfo{ gpuIndex: int(i),
GpuInfo: GpuInfo{ }
Library: "oneapi", // TODO - split bootstrapping from updating free memory
}, C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
driverIndex: int(d), // TODO - convert this to MinimumMemory based on testing...
gpuIndex: int(i), var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DependencyPath = depPath
oneapiGPUs = append(oneapiGPUs, gpuInfo)
} }
// TODO - split bootstrapping from updating free memory
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
// TODO - convert this to MinimumMemory based on testing...
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
memInfo.free = C.uint64_t(totalFreeMem)
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DependencyPath = depPath
oneapiGPUs = append(oneapiGPUs, gpuInfo)
} }
} }
} }
@ -464,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
func FindGPULibs(baseLibName string, defaultPatterns []string) []string { func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var ldPaths []string var ldPaths []string
var patterns []string
gpuLibPaths := []string{} gpuLibPaths := []string{}
slog.Debug("Searching for GPU library", "name", baseLibName) slog.Debug("Searching for GPU library", "name", baseLibName)
// Start with our bundled libraries
patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
switch runtime.GOOS { switch runtime.GOOS {
case "windows": case "windows":
ldPaths = strings.Split(os.Getenv("PATH"), ";") ldPaths = strings.Split(os.Getenv("PATH"), ";")
@ -476,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
default: default:
return gpuLibPaths return gpuLibPaths
} }
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
for _, ldPath := range ldPaths { for _, ldPath := range ldPaths {
d, err := filepath.Abs(ldPath) d, err := filepath.Abs(ldPath)
if err != nil { if err != nil {
continue continue
} }
patterns = append(patterns, filepath.Join(d, baseLibName+"*")) patterns = append(patterns, filepath.Join(d, baseLibName))
} }
patterns = append(patterns, defaultPatterns...) patterns = append(patterns, defaultPatterns...)
slog.Debug("gpu library search", "globs", patterns) slog.Debug("gpu library search", "globs", patterns)
@ -638,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return "", "" return "", ""
} }
} }
func LibraryDir() string {
// On Windows/linux we bundle the dependencies at the same level as the executable
appExe, err := os.Executable()
if err != nil {
slog.Warn("failed to lookup executable path", "error", err)
}
cwd, err := os.Getwd()
if err != nil {
slog.Warn("failed to lookup working directory", "error", err)
}
// Scan for any of our dependeices, and pick first match
for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), ".."), cwd} {
libDep := filepath.Join("lib", "ollama")
if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
return filepath.Join(root, libDep)
}
// Developer mode, local build
if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
}
if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
}
}
slog.Warn("unable to locate gpu dependency libraries")
return ""
}

View File

@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return []GpuInfo{ return []GpuInfo{
{ {
Library: "cpu", Library: "cpu",
Variant: GetCPUCapability(), Variant: GetCPUCapability().String(),
memInfo: mem, memInfo: mem,
}, },
} }
@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return []GpuInfo{ return []GpuInfo{
{ {
Library: "cpu", Library: "cpu",
Variant: GetCPUCapability(), Variant: GetCPUCapability().String(),
memInfo: mem, memInfo: mem,
}, },
} }

View File

@ -47,7 +47,7 @@ var (
CudartMgmtName = "libcudart.so*" CudartMgmtName = "libcudart.so*"
NvcudaMgmtName = "libcuda.so*" NvcudaMgmtName = "libcuda.so*"
NvmlMgmtName = "" // not currently wired on linux NvmlMgmtName = "" // not currently wired on linux
OneapiMgmtName = "libze_intel_gpu.so" OneapiMgmtName = "libze_intel_gpu.so*"
) )
func GetCPUMem() (memInfo, error) { func GetCPUMem() (memInfo, error) {

View File

@ -19,7 +19,7 @@ type GpuInfo struct {
Library string `json:"library,omitempty"` Library string `json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags) // Optional variant to select (e.g. versions, cpu feature flags)
Variant CPUCapability `json:"variant"` Variant string `json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU // MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory uint64 `json:"-"` MinimumMemory uint64 `json:"-"`
@ -53,8 +53,10 @@ type CPUInfo struct {
type CudaGPUInfo struct { type CudaGPUInfo struct {
GpuInfo GpuInfo
OSOverhead uint64 // Memory overhead between the driver library and management library OSOverhead uint64 // Memory overhead between the driver library and management library
index int //nolint:unused,nolintlint index int //nolint:unused,nolintlint
computeMajor int //nolint:unused,nolintlint
computeMinor int //nolint:unused,nolintlint
} }
type CudaGPUInfoList []CudaGPUInfo type CudaGPUInfoList []CudaGPUInfo
@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for _, info := range l { for _, info := range l {
found := false found := false
requested := info.Library requested := info.Library
if info.Variant != CPUCapabilityNone { if info.Variant != CPUCapabilityNone.String() {
requested += "_" + info.Variant.String() requested += "_" + info.Variant
} }
for i, lib := range libs { for i, lib := range libs {
if lib == requested { if lib == requested {
@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
slog.Info("inference compute", slog.Info("inference compute",
"id", g.ID, "id", g.ID,
"library", g.Library, "library", g.Library,
"variant", g.Variant,
"compute", g.Compute, "compute", g.Compute,
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor), "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
"name", g.Name, "name", g.Name,

View File

@ -5,6 +5,7 @@ package integration
import ( import (
"context" "context"
"log/slog" "log/slog"
"os"
"strconv" "strconv"
"sync" "sync"
"testing" "testing"
@ -13,7 +14,6 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
}, },
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims", "british"}, {"england", "english", "massachusetts", "pilgrims", "british"},
} }
) )
var wg sync.WaitGroup var wg sync.WaitGroup
@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
reqLimit := len(req) reqLimit := len(req)
iterLimit := 5 iterLimit := 5
vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
if vram != "" { maxVram, err := strconv.ParseUint(s, 10, 64)
max, err := strconv.ParseUint(vram, 10, 64)
require.NoError(t, err) require.NoError(t, err)
// Don't hammer on small VRAM cards... // Don't hammer on small VRAM cards...
if max < 4*1024*1024*1024 { if maxVram < 4*format.GibiByte {
reqLimit = min(reqLimit, 2) reqLimit = min(reqLimit, 2)
iterLimit = 2 iterLimit = 2
} }
@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
consumed := uint64(256 * format.MebiByte) // Assume some baseline usage consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
for i := 0; i < len(req); i++ { for i := 0; i < len(req); i++ {
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long // Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
if i > 1 && consumed > vram { if i > 1 && consumed > maxVram {
slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
break break
} }
consumed += chosenModels[i].size consumed += chosenModels[i].size
slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed)) slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
wg.Add(1) wg.Add(1)
go func(i int) { go func(i int) {

View File

@ -70,8 +70,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0]) t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
} }
if res.PromptEvalCount != 8 { if res.PromptEvalCount != 6 {
t.Fatalf("expected 8 prompt tokens, got %d", res.PromptEvalCount) t.Fatalf("expected 6 prompt tokens, got %d", res.PromptEvalCount)
} }
} }
@ -102,8 +102,8 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0]) t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
} }
if res.PromptEvalCount != 16 { if res.PromptEvalCount != 12 {
t.Fatalf("expected 16 prompt tokens, got %d", res.PromptEvalCount) t.Fatalf("expected 12 prompt tokens, got %d", res.PromptEvalCount)
} }
} }

View File

@ -35,8 +35,8 @@ var (
}, },
} }
resp = [2][]string{ resp = [2][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"england", "english", "massachusetts", "pilgrims"}, {"england", "english", "massachusetts", "pilgrims"},
} }
) )

View File

@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits // Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount := 32 threadCount := 32
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 { if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
threadCount = maxQueue threadCount = int(maxQueue)
} else { } else {
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount)) t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
} }

View File

@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
}, },
}, },
[][]string{ [][]string{
[]string{"sunlight"}, {"sunlight"},
[]string{"soil", "organic", "earth", "black", "tan"}, {"soil", "organic", "earth", "black", "tan"},
[]string{"england", "english", "massachusetts", "pilgrims", "british"}, {"england", "english", "massachusetts", "pilgrims", "british"},
[]string{"fourth", "july", "declaration", "independence"}, {"fourth", "july", "declaration", "independence"},
[]string{"nitrogen", "oxygen", "carbon", "dioxide"}, {"nitrogen", "oxygen", "carbon", "dioxide"},
} }
} }

View File

@ -1,12 +1,13 @@
set(TARGET ollama_llama_server) set(TARGET ollama_llama_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
set(LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h) add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
install(TARGETS ${TARGET} RUNTIME) install(TARGETS ${TARGET} RUNTIME)
target_compile_definitions(${TARGET} PRIVATE target_compile_definitions(${TARGET} PRIVATE
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}> SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
) )
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT} ${LLAMA_SERVER_LDFLAGS})
if (WIN32) if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
endif() endif()

View File

@ -44,6 +44,7 @@
#include <errhandlingapi.h> #include <errhandlingapi.h>
#endif #endif
#include <algorithm>
#include <cstddef> #include <cstddef>
#include <thread> #include <thread>
#include <chrono> #include <chrono>
@ -402,7 +403,9 @@ struct llama_server_context
} }
} }
std::tie(model, ctx) = llama_init_from_gpt_params(params); auto init_result = llama_init_from_gpt_params(params);
model = init_result.model;
ctx = init_result.context;
if (model == nullptr) if (model == nullptr)
{ {
LOG_ERROR("unable to load model", {{"model", params.model}}); LOG_ERROR("unable to load model", {{"model", params.model}});
@ -1221,7 +1224,6 @@ struct llama_server_context
res.result_json = json res.result_json = json
{ {
{"embedding", std::vector<float>(embd, embd + n_embd)}, {"embedding", std::vector<float>(embd, embd + n_embd)},
{"timings", slot.get_formated_timings()},
}; };
} }
} }
@ -1427,7 +1429,13 @@ struct llama_server_context
switch (task.type) switch (task.type)
{ {
case TASK_TYPE_COMPLETION: { case TASK_TYPE_COMPLETION: {
server_slot *slot = prefix_slot(task.data["prompt"]); server_slot *slot = nullptr;
if (task.embedding_mode) {
// Embedding seq_id (aka slot id) must always be <= token length, so always use slot 0
slot = slots[0].available() ? &slots[0] : nullptr;
} else {
slot = prefix_slot(task.data["prompt"]);
}
if (slot == nullptr) if (slot == nullptr)
{ {
// if no slot is available, we defer this task for processing later // if no slot is available, we defer this task for processing later
@ -2420,7 +2428,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param = true; invalid_param = true;
break; break;
} }
params.lora_adapter.emplace_back(argv[i], 1.0f); params.lora_adapters.push_back({
std::string(argv[i]),
1.0,
});
params.use_mmap = false; params.use_mmap = false;
} }
else if (arg == "--lora-scaled") else if (arg == "--lora-scaled")
@ -2436,7 +2447,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param = true; invalid_param = true;
break; break;
} }
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); params.lora_adapters.push_back({
lora_adapter,
std::stof(argv[i])
});
params.use_mmap = false; params.use_mmap = false;
} }
else if (arg == "-v" || arg == "--verbose") else if (arg == "-v" || arg == "--verbose")
@ -3184,37 +3198,17 @@ int main(int argc, char **argv) {
prompt = ""; prompt = "";
} }
if (prompt.size() == 1) {
prompt = prompt[0];
}
// create and queue the task // create and queue the task
json responses; const int task_id = llama.queue_tasks.get_new_id();
{ llama.queue_results.add_waiting_task_id(task_id);
const int id_task = llama.queue_tasks.get_new_id(); llama.request_completion(task_id, {{"prompt", prompt}}, true, -1);
llama.queue_results.add_waiting_task_id(id_task);
llama.request_completion(id_task, {{"prompt", prompt}}, true, -1);
// get the result // get the result
task_result result = llama.queue_results.recv(id_task); task_result result = llama.queue_results.recv(task_id);
llama.queue_results.remove_waiting_task_id(id_task); llama.queue_results.remove_waiting_task_id(task_id);
if (result.error) {
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
}
responses = result.result_json.value("results", std::vector<json>{result.result_json}); // send the result
json embeddings = json::array(); return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
int prompt_n = 0;
for (auto & elem : responses) {
embeddings.push_back(elem.at("embedding"));
prompt_n += elem.at("timings").at("prompt_n").get<int>();
}
// send the result
json embedding_res = json{{"embedding", embeddings}, {"prompt_n", prompt_n}};
return res.set_content(embedding_res.dump(), "application/json; charset=utf-8");
}
}); });
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!? // GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?

View File

@ -9,11 +9,14 @@ init_vars() {
ARCH="arm64" ARCH="arm64"
;; ;;
*) *)
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g") echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
exit 1
;;
esac esac
LLAMACPP_DIR=../llama.cpp LLAMACPP_DIR=../llama.cpp
CMAKE_DEFS="" CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on"
CMAKE_TARGETS="--target ollama_llama_server" CMAKE_TARGETS="--target ollama_llama_server"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}" CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
@ -27,6 +30,7 @@ init_vars() {
WHOLE_ARCHIVE="-Wl,-force_load" WHOLE_ARCHIVE="-Wl,-force_load"
NO_WHOLE_ARCHIVE="" NO_WHOLE_ARCHIVE=""
GCC_ARCH="-arch ${ARCH}" GCC_ARCH="-arch ${ARCH}"
DIST_BASE=../../dist/darwin-${GOARCH}/
;; ;;
"Linux") "Linux")
LIB_EXT="so" LIB_EXT="so"
@ -35,6 +39,7 @@ init_vars() {
# Cross compiling not supported on linux - Use docker # Cross compiling not supported on linux - Use docker
GCC_ARCH="" GCC_ARCH=""
DIST_BASE=../../dist/linux-${GOARCH}/
;; ;;
*) *)
;; ;;
@ -42,6 +47,7 @@ init_vars() {
if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80" CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
fi fi
GZIP=$(which pigz 2>/dev/null || echo "gzip")
} }
git_module_setup() { git_module_setup() {
@ -85,26 +91,36 @@ build() {
compress() { compress() {
echo "Compressing payloads to reduce overall binary size..." echo "Compressing payloads to reduce overall binary size..."
pids=""
rm -rf ${BUILD_DIR}/bin/*.gz rm -rf ${BUILD_DIR}/bin/*.gz
for f in ${BUILD_DIR}/bin/* ; do for f in ${BUILD_DIR}/bin/* ; do
gzip -n --best -f ${f} & ${GZIP} -n --best -f ${f} &
pids+=" $!" compress_pids+=" $!"
done done
# check for lib directory # check for lib directory
if [ -d ${BUILD_DIR}/lib ]; then if [ -d ${BUILD_DIR}/lib ]; then
for f in ${BUILD_DIR}/lib/* ; do for f in ${BUILD_DIR}/lib/* ; do
gzip -n --best -f ${f} & ${GZIP} -n --best -f ${f} &
pids+=" $!" compress_pids+=" $!"
done done
fi fi
echo echo
for pid in ${pids}; do }
wait_for_compress() {
for pid in ${compress_pids}; do
wait $pid wait $pid
done done
echo "Finished compression" echo "Finished compression"
} }
install() {
echo "Installing libraries to bin dir ${BUILD_DIR}/bin/"
for lib in $(find ${BUILD_DIR} -name \*.${LIB_EXT}); do
rm -f "${BUILD_DIR}/bin/$(basename ${lib})"
cp -af "${lib}" "${BUILD_DIR}/bin/"
done
}
# Keep the local tree clean after we're done with the build # Keep the local tree clean after we're done with the build
cleanup() { cleanup() {
(cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt) (cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)

View File

@ -6,6 +6,7 @@
set -ex set -ex
set -o pipefail set -o pipefail
compress_pids=""
echo "Starting darwin generate script" echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh source $(dirname $0)/gen_common.sh
init_vars init_vars
@ -98,4 +99,5 @@ case "${GOARCH}" in
esac esac
cleanup cleanup
wait_for_compress
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@ -13,6 +13,7 @@
set -ex set -ex
set -o pipefail set -o pipefail
compress_pids=""
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() { amdGPUs() {
@ -51,7 +52,7 @@ if [ -z "${CUDACXX}" ]; then
export CUDACXX=$(command -v nvcc) export CUDACXX=$(command -v nvcc)
fi fi
fi fi
COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off" COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
source $(dirname $0)/gen_common.sh source $(dirname $0)/gen_common.sh
init_vars init_vars
git_module_setup git_module_setup
@ -77,10 +78,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
init_vars init_vars
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\"" echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cpu" BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building custom CPU" echo "Building custom CPU"
build build
install
compress compress
else else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512 # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
@ -93,7 +95,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off" COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
# #
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
@ -103,6 +105,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR="../build/linux/${ARCH}/cpu" BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building LCD CPU" echo "Building LCD CPU"
build build
install
compress compress
fi fi
@ -120,6 +123,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR="../build/linux/${ARCH}/cpu_avx" BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
echo "Building AVX CPU" echo "Building AVX CPU"
build build
install
compress compress
fi fi
@ -133,6 +137,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR="../build/linux/${ARCH}/cpu_avx2" BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU" echo "Building AVX2 CPU"
build build
install
compress compress
fi fi
fi fi
@ -160,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo "CUDA libraries detected - building dynamic CUDA library" echo "CUDA libraries detected - building dynamic CUDA library"
init_vars init_vars
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true) CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
if [ -n "${CUDA_MAJOR}" ]; then if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
CUDA_VARIANT=_v${CUDA_MAJOR} CUDA_VARIANT=_v${CUDA_MAJOR}
fi fi
if [ "${ARCH}" == "arm64" ]; then if [ "${ARCH}" == "arm64" ]; then
@ -178,29 +183,19 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}" CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
echo "Building custom CUDA GPU" echo "Building custom CUDA GPU"
else else
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
fi fi
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}" export CUDAFLAGS="-t8"
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda" export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
build build
install
# Carry the CUDA libs as payloads to help reduce dependency burden on users echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
# mkdir -p "${CUDA_DIST_DIR}"
# TODO - in the future we may shift to packaging these separately and conditionally for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
# downloading them in the install script. cp -a "${lib}" "${CUDA_DIST_DIR}"
DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )"
for lib in libcudart.so libcublas.so libcublasLt.so ; do
DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/"
elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/"
elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then
cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/"
else
cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/"
fi
done done
compress compress
@ -218,21 +213,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
CC=icx CC=icx
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
BUILD_DIR="../build/linux/${ARCH}/oneapi" BUILD_DIR="../build/linux/${ARCH}/oneapi"
EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
build build
# copy oneAPI dependencies # copy oneAPI dependencies
mkdir -p "${ONEAPI_DIST_DIR}"
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
cp "${dep}" "${BUILD_DIR}/bin/" cp -a "${dep}" "${ONEAPI_DIST_DIR}"
done done
cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/" cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
install
compress compress
fi fi
@ -262,23 +260,22 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo "Building custom ROCM GPU" echo "Building custom ROCM GPU"
fi fi
BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}" BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu" # ROCm dependencies are too large to fit into a unified bundle
ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
# TODO figure out how to disable runpath (rpath)
# export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build build
# Record the ROCM dependencies # copy the ROCM dependencies
rm -f "${BUILD_DIR}/bin/deps.txt" mkdir -p "${ROCM_DIST_DIR}"
touch "${BUILD_DIR}/bin/deps.txt" for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do cp -a "${dep}"* "${ROCM_DIST_DIR}"
echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt"
done done
# bomb out if for some reason we didn't get a few deps install
if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then
cat "${BUILD_DIR}/bin/deps.txt"
echo "ERROR: deps file short"
exit 1
fi
compress compress
fi fi
cleanup cleanup
wait_for_compress
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)" echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"

View File

@ -35,7 +35,7 @@ function init_vars {
) )
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on") $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower() $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners" $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\runners"
md "$script:DIST_BASE" -ea 0 > $null md "$script:DIST_BASE" -ea 0 > $null
if ($env:CGO_CFLAGS -contains "-g") { if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo") $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
@ -117,7 +117,7 @@ function build {
if ($cmakeDefs -contains "-G") { if ($cmakeDefs -contains "-G") {
$extra=@("-j8") $extra=@("-j8")
} else { } else {
$extra= @("--", "/p:CL_MPcount=8") $extra= @("--", "/maxCpuCount:8")
} }
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra" write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
@ -261,7 +261,7 @@ function build_cuda() {
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) { if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
# Then build cuda as a dynamically loaded library # Then build cuda as a dynamically loaded library
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe" $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename $script:CUDA_VERSION=((get-item ($nvcc | split-path | split-path)).Basename -Split "\.")[0]
if ($null -ne $script:CUDA_VERSION) { if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
} }
@ -273,9 +273,9 @@ function build_cuda() {
"-DGGML_CUDA=ON", "-DGGML_CUDA=ON",
"-DGGML_AVX=on", "-DGGML_AVX=on",
"-DGGML_AVX2=off", "-DGGML_AVX2=off",
"-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_FLAGS=-t6",
"-DCMAKE_CUDA_FLAGS=-t8", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}" "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
) )
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) { if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`"" write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
@ -286,12 +286,11 @@ function build_cuda() {
sign sign
install install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ea 0 > $null
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
} else { } else {
write-host "Skipping CUDA generation step" write-host "Skipping CUDA generation step"
} }
@ -325,18 +324,17 @@ function build_oneapi() {
sign sign
install install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ea 0 > $null
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
} else { } else {
Write-Host "Skipping oneAPI generation step" Write-Host "Skipping oneAPI generation step"
} }
@ -386,12 +384,11 @@ function build_rocm() {
sign sign
install install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" md "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\rocblas\library\" -ea 0 > $null
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\" cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\rocblas\library\"
} else { } else {
write-host "Skipping ROCm generation step" write-host "Skipping ROCm generation step"
} }

View File

@ -157,6 +157,14 @@ type Tensor struct {
io.WriterTo `json:"-"` io.WriterTo `json:"-"`
} }
func (t Tensor) block() (n int) {
if _, err := fmt.Sscanf(t.Name, "blk.%d.", &n); err != nil {
return -1
}
return
}
func (t Tensor) blockSize() uint64 { func (t Tensor) blockSize() uint64 {
switch t.Kind { switch t.Kind {
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16 case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16

View File

@ -532,15 +532,14 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
} }
} }
slices.SortFunc(ts, func(a, b Tensor) int { slices.SortStableFunc(ts, func(a, b Tensor) int {
var i, j int if i, j := a.block(), b.block(); i < 0 && j > 0 {
if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { return 1
return cmp.Compare(a.Name, b.Name) } else if i > 0 && j < 0 {
} else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 { return -1
return cmp.Compare(a.Name, b.Name) } else {
return cmp.Compare(i, j)
} }
return cmp.Compare(i, j)
}) })
var s uint64 var s uint64

@ -1 +1 @@
Subproject commit 6eeaeba126ff701f3e8f79f246805b7023709972 Subproject commit 1e6f6554aa11fa10160a5fda689e736c3c34169f

View File

@ -33,7 +33,6 @@ func TestEstimateGPULayers(t *testing.T) {
assert.Len(t, tensors, inputLayerCount+1) assert.Len(t, tensors, inputLayerCount+1)
err = WriteGGUF(f, KV{ err = WriteGGUF(f, KV{
"general.architecture": "llama", "general.architecture": "llama",
"general.name": "name",
"llama.context_length": uint32(32), "llama.context_length": uint32(32),
"llama.embedding_length": uint32(4096), "llama.embedding_length": uint32(4096),
"llama.block_count": uint32(inputLayerCount), "llama.block_count": uint32(inputLayerCount),

View File

@ -1,60 +0,0 @@
diff --git a/src/llama.cpp b/src/llama.cpp
index 721b8f4e..cfe7ac40 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -8420,14 +8420,14 @@ struct llm_build_context {
}
struct ggml_tensor * build_inp_mean() {
- lctx.inp_mean = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, n_tokens);
+ lctx.inp_mean = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, cparams.n_seq_max);
cb(lctx.inp_mean, "inp_mean", -1);
ggml_set_input(lctx.inp_mean);
return lctx.inp_mean;
}
struct ggml_tensor * build_inp_cls() {
- lctx.inp_cls = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
+ lctx.inp_cls = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, cparams.n_seq_max);
cb(lctx.inp_cls, "inp_cls", -1);
ggml_set_input(lctx.inp_cls);
return lctx.inp_cls;
@@ -13847,19 +13847,16 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_mean->buffer));
float * data = (float *) lctx.inp_mean->data;
- memset(lctx.inp_mean->data, 0, n_tokens * n_tokens * ggml_element_size(lctx.inp_mean));
+ memset(lctx.inp_mean->data, 0, n_tokens * cparams.n_seq_max * ggml_element_size(lctx.inp_mean));
std::vector<uint64_t> sum(n_tokens, 0);
for (int i = 0; i < n_tokens; ++i) {
const llama_seq_id seq_id = batch.seq_id[i][0];
-
- GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == MEAN");
-
sum[seq_id] += 1;
}
- std::vector<float> div(n_tokens, 0.0f);
- for (int i = 0; i < n_tokens; ++i) {
+ std::vector<float> div(cparams.n_seq_max, 0.0f);
+ for (uint32_t i = 0; i < cparams.n_seq_max; ++i) {
const uint64_t s = sum[i];
if (s > 0) {
div[i] = 1.0f/float(s);
@@ -13879,14 +13876,11 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_cls->buffer));
uint32_t * data = (uint32_t *) lctx.inp_cls->data;
- memset(lctx.inp_cls->data, 0, n_tokens * ggml_element_size(lctx.inp_cls));
+ memset(lctx.inp_cls->data, 0, cparams.n_seq_max * ggml_element_size(lctx.inp_cls));
for (int i = 0; i < n_tokens; ++i) {
const llama_seq_id seq_id = batch.seq_id[i][0];
const llama_pos pos = batch.pos[i];
-
- GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == CLS");
-
if (pos == 0) {
data[seq_id] = i;
}

View File

@ -1,40 +1,32 @@
diff --git a/common/common.cpp b/common/common.cpp diff --git a/common/common.cpp b/common/common.cpp
index dbb724fb..c26fe6ee 100644 index 2e8374d5..70d0afde 100644
--- a/common/common.cpp --- a/common/common.cpp
+++ b/common/common.cpp +++ b/common/common.cpp
@@ -2087,14 +2087,27 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par @@ -2110,9 +2110,21 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) { loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]); if (loaded_la.adapter == nullptr) {
float lora_scale = std::get<1>(params.lora_adapter[i]); fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
+
+ // try to load as gguf
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
if (adapter == nullptr) {
- fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
- llama_free(lctx); - llama_free(lctx);
- llama_free_model(model); - llama_free_model(model);
- return std::make_tuple(nullptr, nullptr); - return iparams;
+ fprintf(stderr, "%s: error: failed to apply lora adapter, trying ggla\n", __func__);
+ +
+ // if that fails, try loading as ggla for compatibility + // if that fails, try loading as ggla for compatibility
+ int err = llama_model_apply_lora_from_file(model, + int err = llama_model_apply_lora_from_file(model,
+ lora_adapter.c_str(), + la.path.c_str(),
+ lora_scale, + la.scale,
+ nullptr, + nullptr,
+ params.n_threads); + params.n_threads);
+ if (err != 0) { + if (err != 0) {
+ fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__); + fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+ llama_free(lctx); + llama_free(lctx);
+ llama_free_model(model); + llama_free_model(model);
+ return std::make_tuple(nullptr, nullptr); + return iparams;
+ } else {
+ break;
+ } + }
+ } else {
+ llama_lora_adapter_set(lctx, adapter, lora_scale);
} }
- llama_lora_adapter_set(lctx, adapter, lora_scale); iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
} }
if (params.ignore_eos) {
diff --git a/include/llama.h b/include/llama.h diff --git a/include/llama.h b/include/llama.h
index 93fd77ca..b0fb37a6 100644 index 93fd77ca..b0fb37a6 100644
--- a/include/llama.h --- a/include/llama.h
@ -355,4 +347,4 @@ index 80a0dd0f..9d7b0e17 100644
+ return 1; + return 1;
+ } + }
+} +}
\ No newline at end of file \ No newline at end of file

View File

@ -1,20 +0,0 @@
diff --git a/src/llama.cpp b/src/llama.cpp
index a207451f..fba6b175 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -4969,6 +4969,7 @@ static void llm_load_hparams(
hparams.attn_soft_cap = true;
switch (hparams.n_layer) {
+ case 26: model.type = e_model::MODEL_2B; break;
case 42: model.type = e_model::MODEL_9B; break;
case 46: model.type = e_model::MODEL_27B; break;
default: model.type = e_model::MODEL_UNKNOWN;
@@ -11736,6 +11737,7 @@ struct llm_build_context {
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
switch (model.type) {
+ case e_model::MODEL_2B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
default: GGML_ABORT("fatal error");

View File

@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_ // glob workDir for files that start with ollama_
availableServers := getAvailableServers() availableServers := getAvailableServers()
requested := info.Library requested := info.Library
if info.Variant != gpu.CPUCapabilityNone { if info.Variant != gpu.CPUCapabilityNone.String() {
requested += "_" + info.Variant.String() requested += "_" + info.Variant
} }
servers := []string{} servers := []string{}

View File

@ -33,7 +33,7 @@ type LlamaServer interface {
Ping(ctx context.Context) error Ping(ctx context.Context) error
WaitUntilRunning(ctx context.Context) error WaitUntilRunning(ctx context.Context) error
Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error
Embed(ctx context.Context, input []string) (*EmbedResponse, error) Embedding(ctx context.Context, input string) ([]float32, error)
Tokenize(ctx context.Context, content string) ([]int, error) Tokenize(ctx context.Context, content string) ([]int, error)
Detokenize(ctx context.Context, tokens []int) (string, error) Detokenize(ctx context.Context, tokens []int) (string, error)
Close() error Close() error
@ -44,11 +44,12 @@ type LlamaServer interface {
// llmServer is an instance of the llama.cpp server // llmServer is an instance of the llama.cpp server
type llmServer struct { type llmServer struct {
port int port int
cmd *exec.Cmd cmd *exec.Cmd
done chan error // Channel to signal when the process exits done chan error // Channel to signal when the process exits
status *StatusWriter status *StatusWriter
options api.Options options api.Options
numParallel int
estimate MemoryEstimate estimate MemoryEstimate
totalLayers uint64 totalLayers uint64
@ -124,8 +125,9 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
} }
} }
// On linux, over-allocating CPU memory will almost always result in an error // On linux and windows, over-allocating CPU memory will almost always result in an error
if runtime.GOOS == "linux" { // Darwin has fully dynamic swap so has no direct concept of free swap space
if runtime.GOOS != "darwin" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
available := systemFreeMemory + systemSwapFreeMemory available := systemFreeMemory + systemSwapFreeMemory
if systemMemoryRequired > available { if systemMemoryRequired > available {
@ -256,8 +258,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--mlock") params = append(params, "--mlock")
} }
if opts.UseNUMA { if gpu.IsNUMA() {
params = append(params, "--numa") numaMode := "distribute"
if runtime.GOOS == "linux" {
if _, err := exec.LookPath("numactl"); err == nil {
numaMode = "numactl"
}
}
params = append(params, "--numa", numaMode)
} }
params = append(params, "--parallel", strconv.Itoa(numParallel)) params = append(params, "--parallel", strconv.Itoa(numParallel))
@ -298,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
pathEnv = "PATH" pathEnv = "PATH"
} }
// prepend the server directory to LD_LIBRARY_PATH/PATH and the parent dir for common dependencies // Start with the server directory for the LD_LIBRARY_PATH/PATH
libraryPaths := []string{dir, filepath.Dir(dir)} libraryPaths := []string{dir}
if libraryPath, ok := os.LookupEnv(pathEnv); ok { if libraryPath, ok := os.LookupEnv(pathEnv); ok {
// Append our runner directory to the path // favor our bundled library dependencies over system libraries
// This will favor system libraries over our bundled library dependencies
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...) libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
} }
// Note: we always put the dependency path first // Note: we always put the dependency path first
// since this was the exact version we verified for AMD GPUs // since this was the exact version we compiled/linked against
// and we favor what the user had in their path
if gpus[0].DependencyPath != "" { if gpus[0].DependencyPath != "" {
// TODO refine for multi-gpu support // assume gpus from the same library have the same dependency path
libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...) libraryPaths = append([]string{gpus[0].DependencyPath}, libraryPaths...)
} }
@ -337,6 +343,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
status: NewStatusWriter(os.Stderr), status: NewStatusWriter(os.Stderr),
options: opts, options: opts,
estimate: estimate, estimate: estimate,
numParallel: numParallel,
sem: semaphore.NewWeighted(int64(numParallel)), sem: semaphore.NewWeighted(int64(numParallel)),
totalLayers: ggml.KV().BlockCount() + 1, totalLayers: ggml.KV().BlockCount() + 1,
gpus: gpus, gpus: gpus,
@ -874,16 +881,15 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
return nil return nil
} }
type EmbedRequest struct { type EmbeddingRequest struct {
Content []string `json:"content"` Content string `json:"content"`
} }
type EmbedResponse struct { type EmbeddingResponse struct {
Embedding [][]float32 `json:"embedding"` Embedding []float32 `json:"embedding"`
PromptEvalCount int `json:"prompt_n"`
} }
func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse, error) { func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
if err := s.sem.Acquire(ctx, 1); err != nil { if err := s.sem.Acquire(ctx, 1); err != nil {
slog.Error("Failed to acquire semaphore", "error", err) slog.Error("Failed to acquire semaphore", "error", err)
return nil, err return nil, err
@ -898,18 +904,18 @@ func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse,
return nil, fmt.Errorf("unexpected server status: %s", status.ToString()) return nil, fmt.Errorf("unexpected server status: %s", status.ToString())
} }
data, err := json.Marshal(EmbedRequest{Content: input}) data, err := json.Marshal(EmbeddingRequest{Content: input})
if err != nil { if err != nil {
return nil, fmt.Errorf("error marshaling embed data: %w", err) return nil, fmt.Errorf("error marshaling embed data: %w", err)
} }
req, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/embedding", s.port), bytes.NewBuffer(data)) r, err := http.NewRequestWithContext(ctx, http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/embedding", s.port), bytes.NewBuffer(data))
if err != nil { if err != nil {
return nil, fmt.Errorf("error creating embed request: %w", err) return nil, fmt.Errorf("error creating embed request: %w", err)
} }
req.Header.Set("Content-Type", "application/json") r.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req) resp, err := http.DefaultClient.Do(r)
if err != nil { if err != nil {
return nil, fmt.Errorf("do embedding request: %w", err) return nil, fmt.Errorf("do embedding request: %w", err)
} }
@ -925,12 +931,12 @@ func (s *llmServer) Embed(ctx context.Context, input []string) (*EmbedResponse,
return nil, fmt.Errorf("%s", body) return nil, fmt.Errorf("%s", body)
} }
var e EmbedResponse var e EmbeddingResponse
if err := json.Unmarshal(body, &e); err != nil { if err := json.Unmarshal(body, &e); err != nil {
return nil, fmt.Errorf("unmarshal tokenize response: %w", err) return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
} }
return &e, nil return e.Embedding, nil
} }
type TokenizeRequest struct { type TokenizeRequest struct {

View File

@ -26,6 +26,7 @@ var errorPrefixes = []string{
"cudaMalloc failed", "cudaMalloc failed",
"\"ERR\"", "\"ERR\"",
"error loading model", "error loading model",
"GGML_ASSERT",
} }
func (w *StatusWriter) Write(b []byte) (int, error) { func (w *StatusWriter) Write(b []byte) (int, error) {

View File

@ -7,27 +7,22 @@ import (
"io" "io"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"reflect"
"strings" "strings"
"testing" "testing"
"time" "time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/stretchr/testify/assert"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
) )
const ( const (
prefix = `data:image/jpeg;base64,` prefix = `data:image/jpeg;base64,`
image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=` image = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
imageURL = prefix + image
) )
func prepareRequest(req *http.Request, body any) { var False = false
bodyBytes, _ := json.Marshal(body)
req.Body = io.NopCloser(bytes.NewReader(bodyBytes))
req.Header.Set("Content-Type", "application/json")
}
func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc { func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc {
return func(c *gin.Context) { return func(c *gin.Context) {
@ -43,134 +38,136 @@ func captureRequestMiddleware(capturedRequest any) gin.HandlerFunc {
func TestChatMiddleware(t *testing.T) { func TestChatMiddleware(t *testing.T) {
type testCase struct { type testCase struct {
Name string name string
Setup func(t *testing.T, req *http.Request) body string
Expected func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) req api.ChatRequest
err ErrorResponse
} }
var capturedRequest *api.ChatRequest var capturedRequest *api.ChatRequest
testCases := []testCase{ testCases := []testCase{
{ {
Name: "chat handler", name: "chat handler",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := ChatCompletionRequest{ "model": "test-model",
Model: "test-model", "messages": [
Messages: []Message{{Role: "user", Content: "Hello"}}, {"role": "user", "content": "Hello"}
} ]
prepareRequest(req, body) }`,
}, req: api.ChatRequest{
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) { Model: "test-model",
if resp.Code != http.StatusOK { Messages: []api.Message{
t.Fatalf("expected 200, got %d", resp.Code) {
} Role: "user",
Content: "Hello",
if req.Messages[0].Role != "user" { },
t.Fatalf("expected 'user', got %s", req.Messages[0].Role) },
} Options: map[string]any{
"temperature": 1.0,
if req.Messages[0].Content != "Hello" { "top_p": 1.0,
t.Fatalf("expected 'Hello', got %s", req.Messages[0].Content) },
} Stream: &False,
}, },
}, },
{ {
Name: "chat handler with image content", name: "chat handler with image content",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := ChatCompletionRequest{ "model": "test-model",
Model: "test-model", "messages": [
Messages: []Message{ {
{ "role": "user",
Role: "user", Content: []map[string]any{ "content": [
{"type": "text", "text": "Hello"}, {
{"type": "image_url", "image_url": map[string]string{"url": imageURL}}, "type": "text",
"text": "Hello"
},
{
"type": "image_url",
"image_url": {
"url": "` + prefix + image + `"
}
}
]
}
]
}`,
req: api.ChatRequest{
Model: "test-model",
Messages: []api.Message{
{
Role: "user",
Content: "Hello",
},
{
Role: "user",
Images: []api.ImageData{
func() []byte {
img, _ := base64.StdEncoding.DecodeString(image)
return img
}(),
},
},
},
Options: map[string]any{
"temperature": 1.0,
"top_p": 1.0,
},
Stream: &False,
},
},
{
name: "chat handler with tools",
body: `{
"model": "test-model",
"messages": [
{"role": "user", "content": "What's the weather like in Paris Today?"},
{"role": "assistant", "tool_calls": [{"id": "id", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Paris, France\", \"format\": \"celsius\"}"}}]}
]
}`,
req: api.ChatRequest{
Model: "test-model",
Messages: []api.Message{
{
Role: "user",
Content: "What's the weather like in Paris Today?",
},
{
Role: "assistant",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get_current_weather",
Arguments: map[string]interface{}{
"location": "Paris, France",
"format": "celsius",
},
},
}, },
}, },
}, },
} },
prepareRequest(req, body) Options: map[string]any{
}, "temperature": 1.0,
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) { "top_p": 1.0,
if resp.Code != http.StatusOK { },
t.Fatalf("expected 200, got %d", resp.Code) Stream: &False,
}
if req.Messages[0].Role != "user" {
t.Fatalf("expected 'user', got %s", req.Messages[0].Role)
}
if req.Messages[0].Content != "Hello" {
t.Fatalf("expected 'Hello', got %s", req.Messages[0].Content)
}
img, _ := base64.StdEncoding.DecodeString(imageURL[len(prefix):])
if req.Messages[1].Role != "user" {
t.Fatalf("expected 'user', got %s", req.Messages[1].Role)
}
if !bytes.Equal(req.Messages[1].Images[0], img) {
t.Fatalf("expected image encoding, got %s", req.Messages[1].Images[0])
}
}, },
}, },
{ {
Name: "chat handler with tools", name: "chat handler error forwarding",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := ChatCompletionRequest{ "model": "test-model",
Model: "test-model", "messages": [
Messages: []Message{ {"role": "user", "content": 2}
{Role: "user", Content: "What's the weather like in Paris Today?"}, ]
{Role: "assistant", ToolCalls: []ToolCall{{ }`,
ID: "id", err: ErrorResponse{
Type: "function", Error: Error{
Function: struct { Message: "invalid message content type: float64",
Name string `json:"name"` Type: "invalid_request_error",
Arguments string `json:"arguments"` },
}{
Name: "get_current_weather",
Arguments: "{\"location\": \"Paris, France\", \"format\": \"celsius\"}",
},
}}},
},
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
if resp.Code != 200 {
t.Fatalf("expected 200, got %d", resp.Code)
}
if req.Messages[0].Content != "What's the weather like in Paris Today?" {
t.Fatalf("expected What's the weather like in Paris Today?, got %s", req.Messages[0].Content)
}
if req.Messages[1].ToolCalls[0].Function.Arguments["location"] != "Paris, France" {
t.Fatalf("expected 'Paris, France', got %v", req.Messages[1].ToolCalls[0].Function.Arguments["location"])
}
if req.Messages[1].ToolCalls[0].Function.Arguments["format"] != "celsius" {
t.Fatalf("expected celsius, got %v", req.Messages[1].ToolCalls[0].Function.Arguments["format"])
}
},
},
{
Name: "chat handler error forwarding",
Setup: func(t *testing.T, req *http.Request) {
body := ChatCompletionRequest{
Model: "test-model",
Messages: []Message{{Role: "user", Content: 2}},
}
prepareRequest(req, body)
},
Expected: func(t *testing.T, req *api.ChatRequest, resp *httptest.ResponseRecorder) {
if resp.Code != http.StatusBadRequest {
t.Fatalf("expected 400, got %d", resp.Code)
}
if !strings.Contains(resp.Body.String(), "invalid message content type") {
t.Fatalf("error was not forwarded")
}
}, },
}, },
} }
@ -185,16 +182,26 @@ func TestChatMiddleware(t *testing.T) {
router.Handle(http.MethodPost, "/api/chat", endpoint) router.Handle(http.MethodPost, "/api/chat", endpoint)
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/chat", nil) req, _ := http.NewRequest(http.MethodPost, "/api/chat", strings.NewReader(tc.body))
req.Header.Set("Content-Type", "application/json")
tc.Setup(t, req)
resp := httptest.NewRecorder() resp := httptest.NewRecorder()
router.ServeHTTP(resp, req) router.ServeHTTP(resp, req)
tc.Expected(t, capturedRequest, resp) var errResp ErrorResponse
if resp.Code != http.StatusOK {
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
t.Fatal(err)
}
}
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
t.Fatal("requests did not match")
}
if !reflect.DeepEqual(tc.err, errResp) {
t.Fatal("errors did not match")
}
capturedRequest = nil capturedRequest = nil
}) })
} }
@ -202,71 +209,52 @@ func TestChatMiddleware(t *testing.T) {
func TestCompletionsMiddleware(t *testing.T) { func TestCompletionsMiddleware(t *testing.T) {
type testCase struct { type testCase struct {
Name string name string
Setup func(t *testing.T, req *http.Request) body string
Expected func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) req api.GenerateRequest
err ErrorResponse
} }
var capturedRequest *api.GenerateRequest var capturedRequest *api.GenerateRequest
testCases := []testCase{ testCases := []testCase{
{ {
Name: "completions handler", name: "completions handler",
Setup: func(t *testing.T, req *http.Request) { body: `{
temp := float32(0.8) "model": "test-model",
body := CompletionRequest{ "prompt": "Hello",
Model: "test-model", "temperature": 0.8,
Prompt: "Hello", "stop": ["\n", "stop"],
Temperature: &temp, "suffix": "suffix"
Stop: []string{"\n", "stop"}, }`,
Suffix: "suffix", req: api.GenerateRequest{
} Model: "test-model",
prepareRequest(req, body) Prompt: "Hello",
}, Options: map[string]any{
Expected: func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) { "frequency_penalty": 0.0,
if req.Prompt != "Hello" { "presence_penalty": 0.0,
t.Fatalf("expected 'Hello', got %s", req.Prompt) "temperature": 1.6,
} "top_p": 1.0,
"stop": []any{"\n", "stop"},
if req.Options["temperature"] != 1.6 { },
t.Fatalf("expected 1.6, got %f", req.Options["temperature"]) Suffix: "suffix",
} Stream: &False,
stopTokens, ok := req.Options["stop"].([]any)
if !ok {
t.Fatalf("expected stop tokens to be a list")
}
if stopTokens[0] != "\n" || stopTokens[1] != "stop" {
t.Fatalf("expected ['\\n', 'stop'], got %v", stopTokens)
}
if req.Suffix != "suffix" {
t.Fatalf("expected 'suffix', got %s", req.Suffix)
}
}, },
}, },
{ {
Name: "completions handler error forwarding", name: "completions handler error forwarding",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := CompletionRequest{ "model": "test-model",
Model: "test-model", "prompt": "Hello",
Prompt: "Hello", "temperature": null,
Temperature: nil, "stop": [1, 2],
Stop: []int{1, 2}, "suffix": "suffix"
Suffix: "suffix", }`,
} err: ErrorResponse{
prepareRequest(req, body) Error: Error{
}, Message: "invalid type for 'stop' field: float64",
Expected: func(t *testing.T, req *api.GenerateRequest, resp *httptest.ResponseRecorder) { Type: "invalid_request_error",
if resp.Code != http.StatusBadRequest { },
t.Fatalf("expected 400, got %d", resp.Code)
}
if !strings.Contains(resp.Body.String(), "invalid type for 'stop' field") {
t.Fatalf("error was not forwarded")
}
}, },
}, },
} }
@ -281,15 +269,27 @@ func TestCompletionsMiddleware(t *testing.T) {
router.Handle(http.MethodPost, "/api/generate", endpoint) router.Handle(http.MethodPost, "/api/generate", endpoint)
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/generate", nil) req, _ := http.NewRequest(http.MethodPost, "/api/generate", strings.NewReader(tc.body))
req.Header.Set("Content-Type", "application/json")
tc.Setup(t, req)
resp := httptest.NewRecorder() resp := httptest.NewRecorder()
router.ServeHTTP(resp, req) router.ServeHTTP(resp, req)
tc.Expected(t, capturedRequest, resp) var errResp ErrorResponse
if resp.Code != http.StatusOK {
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
t.Fatal(err)
}
}
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
t.Fatal("requests did not match")
}
if !reflect.DeepEqual(tc.err, errResp) {
t.Fatal("errors did not match")
}
capturedRequest = nil capturedRequest = nil
}) })
@ -298,78 +298,47 @@ func TestCompletionsMiddleware(t *testing.T) {
func TestEmbeddingsMiddleware(t *testing.T) { func TestEmbeddingsMiddleware(t *testing.T) {
type testCase struct { type testCase struct {
Name string name string
Setup func(t *testing.T, req *http.Request) body string
Expected func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) req api.EmbedRequest
err ErrorResponse
} }
var capturedRequest *api.EmbedRequest var capturedRequest *api.EmbedRequest
testCases := []testCase{ testCases := []testCase{
{ {
Name: "embed handler single input", name: "embed handler single input",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := EmbedRequest{ "input": "Hello",
Input: "Hello", "model": "test-model"
Model: "test-model", }`,
} req: api.EmbedRequest{
prepareRequest(req, body) Input: "Hello",
}, Model: "test-model",
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
if req.Input != "Hello" {
t.Fatalf("expected 'Hello', got %s", req.Input)
}
if req.Model != "test-model" {
t.Fatalf("expected 'test-model', got %s", req.Model)
}
}, },
}, },
{ {
Name: "embed handler batch input", name: "embed handler batch input",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := EmbedRequest{ "input": ["Hello", "World"],
Input: []string{"Hello", "World"}, "model": "test-model"
Model: "test-model", }`,
} req: api.EmbedRequest{
prepareRequest(req, body) Input: []any{"Hello", "World"},
}, Model: "test-model",
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) {
input, ok := req.Input.([]any)
if !ok {
t.Fatalf("expected input to be a list")
}
if input[0].(string) != "Hello" {
t.Fatalf("expected 'Hello', got %s", input[0])
}
if input[1].(string) != "World" {
t.Fatalf("expected 'World', got %s", input[1])
}
if req.Model != "test-model" {
t.Fatalf("expected 'test-model', got %s", req.Model)
}
}, },
}, },
{ {
Name: "embed handler error forwarding", name: "embed handler error forwarding",
Setup: func(t *testing.T, req *http.Request) { body: `{
body := EmbedRequest{ "model": "test-model"
Model: "test-model", }`,
} err: ErrorResponse{
prepareRequest(req, body) Error: Error{
}, Message: "invalid input",
Expected: func(t *testing.T, req *api.EmbedRequest, resp *httptest.ResponseRecorder) { Type: "invalid_request_error",
if resp.Code != http.StatusBadRequest { },
t.Fatalf("expected 400, got %d", resp.Code)
}
if !strings.Contains(resp.Body.String(), "invalid input") {
t.Fatalf("error was not forwarded")
}
}, },
}, },
} }
@ -384,116 +353,167 @@ func TestEmbeddingsMiddleware(t *testing.T) {
router.Handle(http.MethodPost, "/api/embed", endpoint) router.Handle(http.MethodPost, "/api/embed", endpoint)
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
req, _ := http.NewRequest(http.MethodPost, "/api/embed", nil) req, _ := http.NewRequest(http.MethodPost, "/api/embed", strings.NewReader(tc.body))
req.Header.Set("Content-Type", "application/json")
tc.Setup(t, req)
resp := httptest.NewRecorder() resp := httptest.NewRecorder()
router.ServeHTTP(resp, req) router.ServeHTTP(resp, req)
tc.Expected(t, capturedRequest, resp) var errResp ErrorResponse
if resp.Code != http.StatusOK {
if err := json.Unmarshal(resp.Body.Bytes(), &errResp); err != nil {
t.Fatal(err)
}
}
if capturedRequest != nil && !reflect.DeepEqual(tc.req, *capturedRequest) {
t.Fatal("requests did not match")
}
if !reflect.DeepEqual(tc.err, errResp) {
t.Fatal("errors did not match")
}
capturedRequest = nil capturedRequest = nil
}) })
} }
} }
func TestMiddlewareResponses(t *testing.T) { func TestListMiddleware(t *testing.T) {
type testCase struct { type testCase struct {
Name string name string
Method string endpoint func(c *gin.Context)
Path string resp string
TestPath string
Handler func() gin.HandlerFunc
Endpoint func(c *gin.Context)
Setup func(t *testing.T, req *http.Request)
Expected func(t *testing.T, resp *httptest.ResponseRecorder)
} }
testCases := []testCase{ testCases := []testCase{
{ {
Name: "list handler", name: "list handler",
Method: http.MethodGet, endpoint: func(c *gin.Context) {
Path: "/api/tags",
TestPath: "/api/tags",
Handler: ListMiddleware,
Endpoint: func(c *gin.Context) {
c.JSON(http.StatusOK, api.ListResponse{ c.JSON(http.StatusOK, api.ListResponse{
Models: []api.ListModelResponse{ Models: []api.ListModelResponse{
{ {
Name: "Test Model", Name: "test-model",
ModifiedAt: time.Unix(int64(1686935002), 0).UTC(),
}, },
}, },
}) })
}, },
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) { resp: `{
var listResp ListCompletion "object": "list",
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil { "data": [
t.Fatal(err) {
} "id": "test-model",
"object": "model",
if listResp.Object != "list" { "created": 1686935002,
t.Fatalf("expected list, got %s", listResp.Object) "owned_by": "library"
} }
]
if len(listResp.Data) != 1 { }`,
t.Fatalf("expected 1, got %d", len(listResp.Data))
}
if listResp.Data[0].Id != "Test Model" {
t.Fatalf("expected Test Model, got %s", listResp.Data[0].Id)
}
},
}, },
{ {
Name: "retrieve model", name: "list handler empty output",
Method: http.MethodGet, endpoint: func(c *gin.Context) {
Path: "/api/show/:model", c.JSON(http.StatusOK, api.ListResponse{})
TestPath: "/api/show/test-model",
Handler: RetrieveMiddleware,
Endpoint: func(c *gin.Context) {
c.JSON(http.StatusOK, api.ShowResponse{
ModifiedAt: time.Date(2024, 6, 17, 13, 45, 0, 0, time.UTC),
})
},
Expected: func(t *testing.T, resp *httptest.ResponseRecorder) {
var retrieveResp Model
if err := json.NewDecoder(resp.Body).Decode(&retrieveResp); err != nil {
t.Fatal(err)
}
if retrieveResp.Object != "model" {
t.Fatalf("Expected object to be model, got %s", retrieveResp.Object)
}
if retrieveResp.Id != "test-model" {
t.Fatalf("Expected id to be test-model, got %s", retrieveResp.Id)
}
}, },
resp: `{
"object": "list",
"data": null
}`,
}, },
} }
gin.SetMode(gin.TestMode) gin.SetMode(gin.TestMode)
router := gin.New()
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) { router := gin.New()
router = gin.New() router.Use(ListMiddleware())
router.Use(tc.Handler()) router.Handle(http.MethodGet, "/api/tags", tc.endpoint)
router.Handle(tc.Method, tc.Path, tc.Endpoint) req, _ := http.NewRequest(http.MethodGet, "/api/tags", nil)
req, _ := http.NewRequest(tc.Method, tc.TestPath, nil)
if tc.Setup != nil { resp := httptest.NewRecorder()
tc.Setup(t, req) router.ServeHTTP(resp, req)
}
resp := httptest.NewRecorder() var expected, actual map[string]any
router.ServeHTTP(resp, req) err := json.Unmarshal([]byte(tc.resp), &expected)
if err != nil {
t.Fatalf("failed to unmarshal expected response: %v", err)
}
assert.Equal(t, http.StatusOK, resp.Code) err = json.Unmarshal(resp.Body.Bytes(), &actual)
if err != nil {
t.Fatalf("failed to unmarshal actual response: %v", err)
}
tc.Expected(t, resp) if !reflect.DeepEqual(expected, actual) {
}) t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
}
}
}
func TestRetrieveMiddleware(t *testing.T) {
type testCase struct {
name string
endpoint func(c *gin.Context)
resp string
}
testCases := []testCase{
{
name: "retrieve handler",
endpoint: func(c *gin.Context) {
c.JSON(http.StatusOK, api.ShowResponse{
ModifiedAt: time.Unix(int64(1686935002), 0).UTC(),
})
},
resp: `{
"id":"test-model",
"object":"model",
"created":1686935002,
"owned_by":"library"}
`,
},
{
name: "retrieve handler error forwarding",
endpoint: func(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": "model not found"})
},
resp: `{
"error": {
"code": null,
"message": "model not found",
"param": null,
"type": "api_error"
}
}`,
},
}
gin.SetMode(gin.TestMode)
for _, tc := range testCases {
router := gin.New()
router.Use(RetrieveMiddleware())
router.Handle(http.MethodGet, "/api/show/:model", tc.endpoint)
req, _ := http.NewRequest(http.MethodGet, "/api/show/test-model", nil)
resp := httptest.NewRecorder()
router.ServeHTTP(resp, req)
var expected, actual map[string]any
err := json.Unmarshal([]byte(tc.resp), &expected)
if err != nil {
t.Fatalf("failed to unmarshal expected response: %v", err)
}
err = json.Unmarshal(resp.Body.Bytes(), &actual)
if err != nil {
t.Fatalf("failed to unmarshal actual response: %v", err)
}
if !reflect.DeepEqual(expected, actual) {
t.Errorf("responses did not match\nExpected: %+v\nActual: %+v", expected, actual)
}
} }
} }

View File

@ -3,11 +3,12 @@ package progress
import ( import (
"fmt" "fmt"
"strings" "strings"
"sync/atomic"
"time" "time"
) )
type Spinner struct { type Spinner struct {
message string message atomic.Value
messageWidth int messageWidth int
parts []string parts []string
@ -21,20 +22,25 @@ type Spinner struct {
func NewSpinner(message string) *Spinner { func NewSpinner(message string) *Spinner {
s := &Spinner{ s := &Spinner{
message: message,
parts: []string{ parts: []string{
"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏", "⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
}, },
started: time.Now(), started: time.Now(),
} }
s.SetMessage(message)
go s.start() go s.start()
return s return s
} }
func (s *Spinner) SetMessage(message string) {
s.message.Store(message)
}
func (s *Spinner) String() string { func (s *Spinner) String() string {
var sb strings.Builder var sb strings.Builder
if len(s.message) > 0 {
message := strings.TrimSpace(s.message) if message, ok := s.message.Load().(string); ok && len(message) > 0 {
message := strings.TrimSpace(message)
if s.messageWidth > 0 && len(message) > s.messageWidth { if s.messageWidth > 0 && len(message) > s.messageWidth {
message = message[:s.messageWidth] message = message[:s.messageWidth]
} }

View File

@ -62,7 +62,7 @@ func (b *Buffer) MoveLeft() {
rLength := runewidth.RuneWidth(r) rLength := runewidth.RuneWidth(r)
if b.DisplayPos%b.LineWidth == 0 { if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width)) fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width))
if rLength == 2 { if rLength == 2 {
fmt.Print(CursorLeft) fmt.Print(CursorLeft)
} }
@ -74,7 +74,7 @@ func (b *Buffer) MoveLeft() {
fmt.Print(CursorLeft) fmt.Print(CursorLeft)
} }
} else { } else {
fmt.Print(cursorLeftN(rLength)) fmt.Print(CursorLeftN(rLength))
} }
b.Pos -= 1 b.Pos -= 1
@ -115,15 +115,15 @@ func (b *Buffer) MoveRight() {
b.DisplayPos += rLength b.DisplayPos += rLength
if b.DisplayPos%b.LineWidth == 0 { if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt()))) fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
} else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace { } else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())+rLength)) fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())+rLength))
b.DisplayPos += 1 b.DisplayPos += 1
} else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace { } else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt()))) fmt.Print(CursorDown + CursorBOL + CursorRightN(len(b.Prompt.prompt())))
b.DisplayPos += 1 b.DisplayPos += 1
} else { } else {
fmt.Print(cursorRightN(rLength)) fmt.Print(CursorRightN(rLength))
} }
} }
} }
@ -154,7 +154,7 @@ func (b *Buffer) MoveToStart() {
fmt.Print(CursorUp) fmt.Print(CursorUp)
} }
} }
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt()))) fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())))
b.Pos = 0 b.Pos = 0
b.DisplayPos = 0 b.DisplayPos = 0
} }
@ -169,9 +169,9 @@ func (b *Buffer) MoveToEnd() {
fmt.Print(CursorDown) fmt.Print(CursorDown)
} }
remainder := b.DisplaySize() % b.LineWidth remainder := b.DisplaySize() % b.LineWidth
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder)) fmt.Print(CursorBOL + CursorRightN(len(b.Prompt.prompt())+remainder))
} else { } else {
fmt.Print(cursorRightN(b.DisplaySize() - b.DisplayPos)) fmt.Print(CursorRightN(b.DisplaySize() - b.DisplayPos))
} }
b.Pos = b.Buf.Size() b.Pos = b.Buf.Size()
@ -286,8 +286,7 @@ func (b *Buffer) drawRemaining() {
remLength := runewidth.StringWidth(remainingText) remLength := runewidth.StringWidth(remainingText)
if len(currLine) > 0 { if len(currLine) > 0 {
fmt.Printf(ClearToEOL + currLine) fmt.Print(ClearToEOL + currLine + CursorLeftN(currLineSpace))
fmt.Print(cursorLeftN(currLineSpace))
} else { } else {
fmt.Print(ClearToEOL) fmt.Print(ClearToEOL)
} }
@ -301,9 +300,9 @@ func (b *Buffer) drawRemaining() {
} }
if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText { if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText {
fmt.Print(cursorRightN(currLineSpace)) fmt.Print(CursorRightN(currLineSpace))
fmt.Printf("\n%s", b.Prompt.AltPrompt) fmt.Printf("\n%s", b.Prompt.AltPrompt)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width-currLineSpace)) fmt.Print(CursorUp + CursorBOL + CursorRightN(b.Width-currLineSpace))
} }
// render the other lines // render the other lines
@ -333,9 +332,7 @@ func (b *Buffer) drawRemaining() {
lineLength += runewidth.RuneWidth(c) lineLength += runewidth.RuneWidth(c)
fmt.Printf("%c", c) fmt.Printf("%c", c)
} }
fmt.Print(ClearToEOL) fmt.Print(ClearToEOL + CursorUpN(totalLines) + CursorBOL + CursorRightN(b.Width-currLineSpace))
fmt.Print(cursorUpN(totalLines))
fmt.Printf(CursorBOL + cursorRightN(b.Width-currLineSpace))
hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth) hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth)
@ -357,8 +354,7 @@ func (b *Buffer) Remove() {
if b.DisplayPos%b.LineWidth == 0 { if b.DisplayPos%b.LineWidth == 0 {
// if the user backspaces over the word boundary, do this magic to clear the line // if the user backspaces over the word boundary, do this magic to clear the line
// and move to the end of the previous line // and move to the end of the previous line
fmt.Printf(CursorBOL + ClearToEOL) fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth { if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1) b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
@ -370,24 +366,23 @@ func (b *Buffer) Remove() {
} }
if rLength == 2 { if rLength == 2 {
fmt.Print(CursorLeft + " " + cursorLeftN(2)) fmt.Print(CursorLeft + " " + CursorLeftN(2))
} else { } else {
fmt.Print(" " + CursorLeft) fmt.Print(" " + CursorLeft)
} }
} else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace { } else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace {
fmt.Printf(CursorBOL + ClearToEOL) fmt.Print(CursorBOL + ClearToEOL + CursorUp + CursorBOL + CursorRightN(b.Width))
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if b.Pos == b.Buf.Size() { if b.Pos == b.Buf.Size() {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1) b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
} }
b.DisplayPos -= 1 b.DisplayPos -= 1
} else { } else {
fmt.Print(cursorLeftN(rLength)) fmt.Print(CursorLeftN(rLength))
for range rLength { for range rLength {
fmt.Print(" ") fmt.Print(" ")
} }
fmt.Print(cursorLeftN(rLength)) fmt.Print(CursorLeftN(rLength))
} }
var eraseExtraLine bool var eraseExtraLine bool
@ -405,9 +400,9 @@ func (b *Buffer) Remove() {
// are trailing characters which go over the line width boundary // are trailing characters which go over the line width boundary
if eraseExtraLine { if eraseExtraLine {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL) fmt.Print(CursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
place := b.DisplayPos % b.LineWidth place := b.DisplayPos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt()))) fmt.Print(CursorUpN(remainingLines+1) + CursorRightN(place+len(b.Prompt.prompt())))
} }
} }
} }
@ -422,9 +417,9 @@ func (b *Buffer) Delete() {
if b.DisplaySize()%b.LineWidth == 0 { if b.DisplaySize()%b.LineWidth == 0 {
if b.DisplayPos != b.DisplaySize() { if b.DisplayPos != b.DisplaySize() {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL) fmt.Print(CursorDownN(remainingLines) + CursorBOL + ClearToEOL)
place := b.DisplayPos % b.LineWidth place := b.DisplayPos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt()))) fmt.Print(CursorUpN(remainingLines) + CursorRightN(place+len(b.Prompt.prompt())))
} }
} }
} }
@ -471,17 +466,17 @@ func (b *Buffer) DeleteWord() {
} }
func (b *Buffer) ClearScreen() { func (b *Buffer) ClearScreen() {
fmt.Printf(ClearScreen + CursorReset + b.Prompt.prompt()) fmt.Print(ClearScreen + CursorReset + b.Prompt.prompt())
if b.IsEmpty() { if b.IsEmpty() {
ph := b.Prompt.placeholder() ph := b.Prompt.placeholder()
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault) fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
} else { } else {
currPos := b.DisplayPos currPos := b.DisplayPos
currIndex := b.Pos currIndex := b.Pos
b.Pos = 0 b.Pos = 0
b.DisplayPos = 0 b.DisplayPos = 0
b.drawRemaining() b.drawRemaining()
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt()))) fmt.Print(CursorReset + CursorRightN(len(b.Prompt.prompt())))
if currPos > 0 { if currPos > 0 {
targetLine := currPos / b.LineWidth targetLine := currPos / b.LineWidth
if targetLine > 0 { if targetLine > 0 {
@ -491,10 +486,10 @@ func (b *Buffer) ClearScreen() {
} }
remainder := currPos % b.LineWidth remainder := currPos % b.LineWidth
if remainder > 0 { if remainder > 0 {
fmt.Print(cursorRightN(remainder)) fmt.Print(CursorRightN(remainder))
} }
if currPos%b.LineWidth == 0 { if currPos%b.LineWidth == 0 {
fmt.Printf(CursorBOL + b.Prompt.AltPrompt) fmt.Print(CursorBOL + b.Prompt.AltPrompt)
} }
} }
b.Pos = currIndex b.Pos = currIndex
@ -513,13 +508,13 @@ func (b *Buffer) Replace(r []rune) {
b.Buf.Clear() b.Buf.Clear()
fmt.Printf(CursorBOL + ClearToEOL) fmt.Print(CursorBOL + ClearToEOL)
for range lineNums { for range lineNums {
fmt.Print(CursorUp + CursorBOL + ClearToEOL) fmt.Print(CursorUp + CursorBOL + ClearToEOL)
} }
fmt.Printf(CursorBOL + b.Prompt.prompt()) fmt.Print(CursorBOL + b.Prompt.prompt())
for _, c := range r { for _, c := range r {
b.Add(c) b.Add(c)
@ -545,19 +540,3 @@ func (b *Buffer) StringNM(n, m int) string {
} }
return s return s
} }
func cursorLeftN(n int) string {
return fmt.Sprintf(CursorLeftN, n)
}
func cursorRightN(n int) string {
return fmt.Sprintf(CursorRightN, n)
}
func cursorUpN(n int) string {
return fmt.Sprintf(CursorUpN, n)
}
func cursorDownN(n int) string {
return fmt.Sprintf(CursorDownN, n)
}

View File

@ -98,7 +98,7 @@ func (i *Instance) Readline() (string, error) {
showPlaceholder := !i.Pasting || i.Prompt.UseAlt showPlaceholder := !i.Pasting || i.Prompt.UseAlt
if buf.IsEmpty() && showPlaceholder { if buf.IsEmpty() && showPlaceholder {
ph := i.Prompt.placeholder() ph := i.Prompt.placeholder()
fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault) fmt.Print(ColorGrey + ph + CursorLeftN(len(ph)) + ColorDefault)
} }
r, err := i.Terminal.Read() r, err := i.Terminal.Read()

View File

@ -1,5 +1,7 @@
package readline package readline
import "strconv"
const ( const (
CharNull = 0 CharNull = 0
CharLineStart = 1 CharLineStart = 1
@ -41,34 +43,49 @@ const (
) )
const ( const (
CursorUp = "\033[1A" Esc = "\x1b"
CursorDown = "\033[1B"
CursorRight = "\033[1C"
CursorLeft = "\033[1D"
CursorSave = "\033[s" CursorSave = Esc + "[s"
CursorRestore = "\033[u" CursorRestore = Esc + "[u"
CursorUpN = "\033[%dA" CursorEOL = Esc + "[E"
CursorDownN = "\033[%dB" CursorBOL = Esc + "[1G"
CursorRightN = "\033[%dC" CursorHide = Esc + "[?25l"
CursorLeftN = "\033[%dD" CursorShow = Esc + "[?25h"
CursorEOL = "\033[E" ClearToEOL = Esc + "[K"
CursorBOL = "\033[1G" ClearLine = Esc + "[2K"
CursorHide = "\033[?25l" ClearScreen = Esc + "[2J"
CursorShow = "\033[?25h" CursorReset = Esc + "[0;0f"
ClearToEOL = "\033[K" ColorGrey = Esc + "[38;5;245m"
ClearLine = "\033[2K" ColorDefault = Esc + "[0m"
ClearScreen = "\033[2J"
CursorReset = "\033[0;0f"
ColorGrey = "\033[38;5;245m" StartBracketedPaste = Esc + "[?2004h"
ColorDefault = "\033[0m" EndBracketedPaste = Esc + "[?2004l"
)
StartBracketedPaste = "\033[?2004h" func CursorUpN(n int) string {
EndBracketedPaste = "\033[?2004l" return Esc + "[" + strconv.Itoa(n) + "A"
}
func CursorDownN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "B"
}
func CursorRightN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "C"
}
func CursorLeftN(n int) string {
return Esc + "[" + strconv.Itoa(n) + "D"
}
var (
CursorUp = CursorUpN(1)
CursorDown = CursorDownN(1)
CursorRight = CursorRightN(1)
CursorLeft = CursorLeftN(1)
) )
const ( const (

View File

@ -4,6 +4,7 @@ set -eu
export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
GZIP=$(which pigz 2>/dev/null || echo "gzip")
BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
@ -21,11 +22,16 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t builder:$TARGETARCH \ -t builder:$TARGETARCH \
. .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-$TARGETARCH rm -rf ./dist/linux-$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
if [ "$TARGETARCH" = "amd64" ]; then if echo ${TARGETARCH} | grep "amd64" > /dev/null; then
docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/ docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH-rocm ./dist
fi fi
docker rm builder-$TARGETARCH docker rm builder-$TARGETARCH
echo "Compressing final linux bundle..."
rm -f ./dist/ollama-linux-$TARGETARCH.tgz
(cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz )
if [ -d dist/linux-$TARGETARCH-rocm ]; then
(cd dist/linux-$TARGETARCH-rocm && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH-rocm.tgz )
fi
done done

View File

@ -7,6 +7,7 @@
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
function checkEnv() { function checkEnv() {
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
$script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower() $script:TARGET_ARCH=$Env:PROCESSOR_ARCHITECTURE.ToLower()
Write-host "Building for ${script:TARGET_ARCH}" Write-host "Building for ${script:TARGET_ARCH}"
write-host "Locating required tools and paths" write-host "Locating required tools and paths"
@ -15,26 +16,23 @@ function checkEnv() {
$MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation $MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
} }
# Try to find the CUDA dir # Locate CUDA versions
if ($null -eq $env:NVIDIA_DIR) { # Note: this assumes every version found will be built
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
if ($cudaList.length -eq 0) {
$d=(get-command -ea 'silentlycontinue' nvcc).path $d=(get-command -ea 'silentlycontinue' nvcc).path
if ($d -ne $null) { if ($null -ne $d) {
$script:NVIDIA_DIR=($d| split-path -parent) $script:CUDA_DIRS=@($d| split-path -parent)
} else {
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
if ($cudaList.length > 0) {
$script:NVIDIA_DIR=$cudaList[0]
}
} }
} else { } else {
$script:NVIDIA_DIR=$env:NVIDIA_DIR $script:CUDA_DIRS=$cudaList
} }
$script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0] $script:INNO_SETUP_DIR=(get-item "C:\Program Files*\Inno Setup*\")[0]
$script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}" $script:DEPS_DIR="${script:SRC_DIR}\dist\windows-${script:TARGET_ARCH}"
$env:CGO_ENABLED="1" $env:CGO_ENABLED="1"
echo "Checking version" Write-Output "Checking version"
if (!$env:VERSION) { if (!$env:VERSION) {
$data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always) $data=(git describe --tags --first-parent --abbrev=7 --long --dirty --always)
$pattern="v(.+)" $pattern="v(.+)"
@ -71,7 +69,48 @@ function checkEnv() {
function buildOllama() { function buildOllama() {
write-host "Building ollama CLI" write-host "Building ollama CLI"
if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) { if ($null -eq ${env:OLLAMA_SKIP_GENERATE}) {
& go generate ./... Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
# TODO - consider trying to parallelize this with Start-ThreadJob, but env vars can't be used to toggle
# which targets to build
# Start by skipping CUDA to build everything else
pwsh -Command { $env:OLLAMA_SKIP_CUDA_GENERATE="1"; & go generate ./... }
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# Then skip everyhting else and build all the CUDA variants
foreach ($env:CUDA_LIB_DIR in $script:CUDA_DIRS) {
write-host "Building CUDA ${env:CUDA_LIB_DIR}"
if ($env:CUDA_LIB_DIR.Contains("v12")) {
pwsh -Command {
$env:OLLAMA_SKIP_CUDA_GENERATE=""
$env:OLLAMA_SKIP_STATIC_GENERATE="1"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:OLLAMA_SKIP_ONEAPI_GENERATE="1"
$env:OLLAMA_SKIP_ROCM_GENERATE="1"
$env:CMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
$env:OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on"
$env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent
$env:PATH="$envs:CUDA_LIB_DIR;$env:PATH"
& go generate ./...
}
} else {
pwsh -Command {
$env:OLLAMA_SKIP_CUDA_GENERATE=""
$env:OLLAMA_SKIP_STATIC_GENERATE="1"
$env:OLLAMA_SKIP_CPU_GENERATE="1"
$env:OLLAMA_SKIP_ONEAPI_GENERATE="1"
$env:OLLAMA_SKIP_ROCM_GENERATE="1"
$env:CMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
$env:OLLAMA_CUSTOM_CUDA_DEFS=""
$env:CUDA_PATH=split-path -path $env:CUDA_LIB_DIR -parent
$env:PATH="$envs:CUDA_LIB_DIR;$env:PATH"
& go generate ./...
}
}
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} else { } else {
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set" write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
@ -83,8 +122,8 @@ function buildOllama() {
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} ollama.exe
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
} }
New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\ -Force New-Item -ItemType Directory -Path .\dist\windows-${script:TARGET_ARCH}\bin\ -Force
cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\ cp .\ollama.exe .\dist\windows-${script:TARGET_ARCH}\bin\
} }
function buildApp() { function buildApp() {
@ -103,22 +142,22 @@ function buildApp() {
function gatherDependencies() { function gatherDependencies() {
write-host "Gathering runtime dependencies" write-host "Gathering runtime dependencies"
cd "${script:SRC_DIR}" cd "${script:SRC_DIR}"
md "${script:DEPS_DIR}\ollama_runners" -ea 0 > $null md "${script:DEPS_DIR}\lib\ollama" -ea 0 > $null
# TODO - this varies based on host build system and MSVC version - drive from dumpbin output # TODO - this varies based on host build system and MSVC version - drive from dumpbin output
# currently works for Win11 + MSVC 2019 + Cuda V11 # currently works for Win11 + MSVC 2019 + Cuda V11
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\ollama_runners\" cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\msvcp140*.dll" "${script:DEPS_DIR}\lib\ollama\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\ollama_runners\" cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140.dll" "${script:DEPS_DIR}\lib\ollama\"
cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\ollama_runners\" cp "${env:VCToolsRedistDir}\x64\Microsoft.VC*.CRT\vcruntime140_1.dll" "${script:DEPS_DIR}\lib\ollama\"
foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) { foreach ($part in $("runtime", "stdio", "filesystem", "math", "convert", "heap", "string", "time", "locale", "environment")) {
cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\ollama_runners\" cp "$env:VCToolsRedistDir\..\..\..\Tools\Llvm\x64\bin\api-ms-win-crt-${part}*.dll" "${script:DEPS_DIR}\lib\ollama\"
} }
cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\" cp "${script:SRC_DIR}\app\ollama_welcome.ps1" "${script:SRC_DIR}\dist\"
if ("${env:KEY_CONTAINER}") { if ("${env:KEY_CONTAINER}") {
write-host "about to sign" write-host "about to sign"
foreach ($file in (get-childitem "${script:DEPS_DIR}\cuda\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){ foreach ($file in (get-childitem "${script:DEPS_DIR}\lib\ollama\cu*.dll") + @("${script:SRC_DIR}\dist\ollama_welcome.ps1")){
write-host "signing $file" write-host "signing $file"
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" ` & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
/csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file /csp "Google Cloud KMS Provider" /kc ${env:KEY_CONTAINER} $file

View File

@ -63,16 +63,36 @@ if [ -n "$NEEDS" ]; then
exit 1 exit 1
fi fi
status "Downloading ollama..."
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/ollama "https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}"
for BINDIR in /usr/local/bin /usr/bin /bin; do for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue echo $PATH | grep -q $BINDIR && break || continue
done done
OLLAMA_INSTALL_DIR=$(dirname ${BINDIR})
status "Installing ollama to $BINDIR..." status "Installing ollama to $OLLAMA_INSTALL_DIR"
$SUDO install -o0 -g0 -m755 -d $BINDIR $SUDO install -o0 -g0 -m755 -d $BINDIR
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama $SUDO install -o0 -g0 -m755 -d "$OLLAMA_INSTALL_DIR"
if curl -I --silent --fail --location "https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" >/dev/null ; then
status "Downloading Linux ${ARCH} bundle"
curl --fail --show-error --location --progress-bar \
"https://ollama.com/download/ollama-linux-${ARCH}.tgz${VER_PARAM}" | \
$SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR"
BUNDLE=1
if [ "$OLLAMA_INSTALL_DIR/bin/ollama" != "$BINDIR/ollama" ] ; then
status "Making ollama accessible in the PATH in $BINDIR"
$SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama"
fi
else
status "Downloading Linux ${ARCH} CLI"
curl --fail --show-error --location --progress-bar -o "$TEMP_DIR/ollama"\
"https://ollama.com/download/ollama-linux-${ARCH}${VER_PARAM}"
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $OLLAMA_INSTALL_DIR/ollama
BUNDLE=0
if [ "$OLLAMA_INSTALL_DIR/ollama" != "$BINDIR/ollama" ] ; then
status "Making ollama accessible in the PATH in $BINDIR"
$SUDO ln -sf "$OLLAMA_INSTALL_DIR/ollama" "$BINDIR/ollama"
fi
fi
install_success() { install_success() {
status 'The Ollama API is now available at 127.0.0.1:11434.' status 'The Ollama API is now available at 127.0.0.1:11434.'
@ -178,6 +198,16 @@ if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdg
fi fi
if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then
if [ $BUNDLE -ne 0 ]; then
status "Downloading Linux ROCm ${ARCH} bundle"
curl --fail --show-error --location --progress-bar \
"https://ollama.com/download/ollama-linux-${ARCH}-rocm.tgz${VER_PARAM}" | \
$SUDO tar -xzf - -C "$OLLAMA_INSTALL_DIR"
install_success
status "AMD GPU ready."
exit 0
fi
# Look for pre-existing ROCm v6 before downloading the dependencies # Look for pre-existing ROCm v6 before downloading the dependencies
for search in "${HIP_PATH:-''}" "${ROCM_PATH:-''}" "/opt/rocm" "/usr/lib64"; do for search in "${HIP_PATH:-''}" "${ROCM_PATH:-''}" "/opt/rocm" "/usr/lib64"; do
if [ -n "${search}" ] && [ -e "${search}/libhipblas.so.2" -o -e "${search}/lib/libhipblas.so.2" ]; then if [ -n "${search}" ] && [ -e "${search}/libhipblas.so.2" -o -e "${search}/lib/libhipblas.so.2" ]; then
@ -209,15 +239,15 @@ install_cuda_driver_yum() {
case $PACKAGE_MANAGER in case $PACKAGE_MANAGER in
yum) yum)
$SUDO $PACKAGE_MANAGER -y install yum-utils $SUDO $PACKAGE_MANAGER -y install yum-utils
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo" >/dev/null ; then if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo" >/dev/null ; then
$SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo $SUDO $PACKAGE_MANAGER-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo
else else
error $CUDA_REPO_ERR_MSG error $CUDA_REPO_ERR_MSG
fi fi
;; ;;
dnf) dnf)
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo" >/dev/null ; then if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo" >/dev/null ; then
$SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-$1$2.repo $SUDO $PACKAGE_MANAGER config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-$1$2.repo
else else
error $CUDA_REPO_ERR_MSG error $CUDA_REPO_ERR_MSG
fi fi
@ -245,8 +275,8 @@ install_cuda_driver_yum() {
# ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian # ref: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#debian
install_cuda_driver_apt() { install_cuda_driver_apt() {
status 'Installing NVIDIA repository...' status 'Installing NVIDIA repository...'
if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb" >/dev/null ; then if curl -I --silent --fail --location "https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-keyring_1.1-1_all.deb" >/dev/null ; then
curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m)/cuda-keyring_1.1-1_all.deb curl -fsSL -o $TEMP_DIR/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/$1$2/$(uname -m | sed -e 's/aarch64/sbsa/')/cuda-keyring_1.1-1_all.deb
else else
error $CUDA_REPO_ERR_MSG error $CUDA_REPO_ERR_MSG
fi fi

View File

@ -3,6 +3,7 @@
# Script for common Dockerfile dependency installation in redhat linux based images # Script for common Dockerfile dependency installation in redhat linux based images
set -ex set -ex
set -o pipefail
MACHINE=$(uname -m) MACHINE=$(uname -m)
if grep -i "centos" /etc/system-release >/dev/null; then if grep -i "centos" /etc/system-release >/dev/null; then
@ -29,7 +30,7 @@ if grep -i "centos" /etc/system-release >/dev/null; then
dnf install -y rh-git227-git dnf install -y rh-git227-git
ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git ln -s /opt/rh/rh-git227/root/usr/bin/git /usr/local/bin/git
fi fi
dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ dnf install -y devtoolset-10-gcc devtoolset-10-gcc-c++ pigz
elif grep -i "rocky" /etc/system-release >/dev/null; then elif grep -i "rocky" /etc/system-release >/dev/null; then
# Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC) # Temporary workaround until rocky 8 AppStream ships GCC 10.4 (10.3 is incompatible with NVCC)
cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo cat << EOF > /etc/yum.repos.d/Rocky-Vault.repo
@ -43,12 +44,21 @@ gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-rockyofficial
EOF EOF
dnf install -y git \ dnf install -y git \
gcc-toolset-10-gcc-10.2.1-8.2.el8 \ gcc-toolset-10-gcc-10.2.1-8.2.el8 \
gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
pigz
else else
echo "ERROR Unexpected distro" echo "ERROR Unexpected distro"
exit 1 exit 1
fi fi
if [ "${MACHINE}" = "x86_64" ] ; then
curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /tmp --strip-components 1 && \
mv /tmp/ccache /usr/local/bin/
else
yum -y install epel-release
yum install -y ccache
fi
if [ -n "${CMAKE_VERSION}" ]; then if [ -n "${CMAKE_VERSION}" ]; then
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
fi fi

View File

@ -94,7 +94,7 @@ func (p *blobDownloadPart) UnmarshalJSON(b []byte) error {
} }
const ( const (
numDownloadParts = 64 numDownloadParts = 16
minDownloadPartSize int64 = 100 * format.MegaByte minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 1000 * format.MegaByte maxDownloadPartSize int64 = 1000 * format.MegaByte
) )
@ -216,6 +216,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
return err return err
} }
defer file.Close() defer file.Close()
setSparse(file)
_ = file.Truncate(b.Total) _ = file.Truncate(b.Total)
@ -232,7 +233,7 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *regis
newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error { newOpts.CheckRedirect = func(req *http.Request, via []*http.Request) error {
if len(via) > 10 { if len(via) > 10 {
return errors.New("maxium redirects exceeded (10) for directURL") return errors.New("maximum redirects exceeded (10) for directURL")
} }
// if the hostname is the same, allow the redirect // if the hostname is the same, allow the redirect

View File

@ -215,25 +215,20 @@ func GetManifest(mp ModelPath) (*Manifest, string, error) {
return nil, "", err return nil, "", err
} }
if _, err = os.Stat(fp); err != nil { f, err := os.Open(fp)
return nil, "", err
}
var manifest *Manifest
bts, err := os.ReadFile(fp)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("couldn't open file '%s'", fp) return nil, "", err
} }
defer f.Close()
shaSum := sha256.Sum256(bts) sha256sum := sha256.New()
shaStr := hex.EncodeToString(shaSum[:])
if err := json.Unmarshal(bts, &manifest); err != nil { var manifest Manifest
if err := json.NewDecoder(io.TeeReader(f, sha256sum)).Decode(&manifest); err != nil {
return nil, "", err return nil, "", err
} }
return manifest, shaStr, nil return &manifest, hex.EncodeToString(sha256sum.Sum(nil)), nil
} }
func GetModel(name string) (*Model, error) { func GetModel(name string) (*Model, error) {
@ -250,19 +245,21 @@ func GetModel(name string) (*Model, error) {
Template: template.DefaultTemplate, Template: template.DefaultTemplate,
} }
filename, err := GetBlobsPath(manifest.Config.Digest) if manifest.Config.Digest != "" {
if err != nil { filename, err := GetBlobsPath(manifest.Config.Digest)
return nil, err if err != nil {
} return nil, err
}
configFile, err := os.Open(filename) configFile, err := os.Open(filename)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer configFile.Close() defer configFile.Close()
if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil { if err := json.NewDecoder(configFile).Decode(&model.Config); err != nil {
return nil, err return nil, err
}
} }
for _, layer := range manifest.Layers { for _, layer := range manifest.Layers {
@ -371,7 +368,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
var messages []*api.Message var messages []*api.Message
parameters := make(map[string]any) parameters := make(map[string]any)
var layers []*Layer var layers []Layer
for _, c := range modelfile.Commands { for _, c := range modelfile.Commands {
mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name) mediatype := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
@ -497,7 +494,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
if c.Name != "license" { if c.Name != "license" {
// replace // replace
layers = slices.DeleteFunc(layers, func(layer *Layer) bool { layers = slices.DeleteFunc(layers, func(layer Layer) bool {
if layer.MediaType != mediatype { if layer.MediaType != mediatype {
return false return false
} }
@ -543,7 +540,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
} }
var err2 error var err2 error
layers = slices.DeleteFunc(layers, func(layer *Layer) bool { layers = slices.DeleteFunc(layers, func(layer Layer) bool {
switch layer.MediaType { switch layer.MediaType {
case "application/vnd.ollama.image.message": case "application/vnd.ollama.image.message":
// if there are new messages, remove the inherited ones // if there are new messages, remove the inherited ones
@ -623,12 +620,12 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
return err return err
} }
layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json") configLayer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
if err != nil { if err != nil {
return err return err
} }
for _, layer := range append(layers, layer) { for _, layer := range append(layers, configLayer) {
if layer.status != "" { if layer.status != "" {
fn(api.ProgressResponse{Status: layer.status}) fn(api.ProgressResponse{Status: layer.status})
} }
@ -637,7 +634,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
old, _ := ParseNamedManifest(name) old, _ := ParseNamedManifest(name)
fn(api.ProgressResponse{Status: "writing manifest"}) fn(api.ProgressResponse{Status: "writing manifest"})
if err := WriteManifest(name, layer, layers); err != nil { if err := WriteManifest(name, configLayer, layers); err != nil {
return err return err
} }
@ -690,44 +687,18 @@ func CopyModel(src, dst model.Name) error {
return err return err
} }
func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}) error { func deleteUnusedLayers(deleteMap map[string]struct{}) error {
fp, err := GetManifestPath() manifests, err := Manifests()
if err != nil { if err != nil {
return err return err
} }
walkFunc := func(path string, info os.FileInfo, _ error) error { for _, manifest := range manifests {
if info.IsDir() {
return nil
}
dir, file := filepath.Split(path)
dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
tag := strings.Join([]string{dir, file}, ":")
fmp := ParseModelPath(tag)
// skip the manifest we're trying to delete
if skipModelPath != nil && skipModelPath.GetFullTagname() == fmp.GetFullTagname() {
return nil
}
// save (i.e. delete from the deleteMap) any files used in other manifests
manifest, _, err := GetManifest(fmp)
if err != nil {
//nolint:nilerr
return nil
}
for _, layer := range manifest.Layers { for _, layer := range manifest.Layers {
delete(deleteMap, layer.Digest) delete(deleteMap, layer.Digest)
} }
delete(deleteMap, manifest.Config.Digest) delete(deleteMap, manifest.Config.Digest)
return nil
}
if err := filepath.Walk(fp, walkFunc); err != nil {
return err
} }
// only delete the files which are still in the deleteMap // only delete the files which are still in the deleteMap
@ -780,9 +751,9 @@ func PruneLayers() error {
slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap))) slog.Info(fmt.Sprintf("total blobs: %d", len(deleteMap)))
err = deleteUnusedLayers(nil, deleteMap) if err := deleteUnusedLayers(deleteMap); err != nil {
if err != nil { slog.Error(fmt.Sprintf("couldn't remove unused layers: %v", err))
return err return nil
} }
slog.Info(fmt.Sprintf("total unused blobs removed: %d", len(deleteMap))) slog.Info(fmt.Sprintf("total unused blobs removed: %d", len(deleteMap)))
@ -837,9 +808,11 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return err return err
} }
var layers []*Layer var layers []Layer
layers = append(layers, manifest.Layers...) layers = append(layers, manifest.Layers...)
layers = append(layers, manifest.Config) if manifest.Config.Digest != "" {
layers = append(layers, manifest.Config)
}
for _, layer := range layers { for _, layer := range layers {
if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil { if err := uploadBlob(ctx, mp, layer, regOpts, fn); err != nil {
@ -873,23 +846,18 @@ func PushModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error { func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn func(api.ProgressResponse)) error {
mp := ParseModelPath(name) mp := ParseModelPath(name)
var manifest *Manifest
var err error
var noprune string
// build deleteMap to prune unused layers // build deleteMap to prune unused layers
deleteMap := make(map[string]struct{}) deleteMap := make(map[string]struct{})
manifest, _, err := GetManifest(mp)
if !envconfig.NoPrune() { if errors.Is(err, os.ErrNotExist) {
manifest, _, err = GetManifest(mp) // noop
if err != nil && !errors.Is(err, os.ErrNotExist) { } else if err != nil && !errors.Is(err, os.ErrNotExist) {
return err return err
} else {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
} }
if manifest.Config.Digest != "" {
if manifest != nil {
for _, l := range manifest.Layers {
deleteMap[l.Digest] = struct{}{}
}
deleteMap[manifest.Config.Digest] = struct{}{} deleteMap[manifest.Config.Digest] = struct{}{}
} }
} }
@ -905,9 +873,11 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return fmt.Errorf("pull model manifest: %s", err) return fmt.Errorf("pull model manifest: %s", err)
} }
var layers []*Layer var layers []Layer
layers = append(layers, manifest.Layers...) layers = append(layers, manifest.Layers...)
layers = append(layers, manifest.Config) if manifest.Config.Digest != "" {
layers = append(layers, manifest.Config)
}
skipVerify := make(map[string]bool) skipVerify := make(map[string]bool)
for _, layer := range layers { for _, layer := range layers {
@ -967,11 +937,10 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
return err return err
} }
if noprune == "" { if !envconfig.NoPrune() && len(deleteMap) > 0 {
fn(api.ProgressResponse{Status: "removing any unused layers"}) fn(api.ProgressResponse{Status: "removing unused layers"})
err = deleteUnusedLayers(nil, deleteMap) if err := deleteUnusedLayers(deleteMap); err != nil {
if err != nil { fn(api.ProgressResponse{Status: fmt.Sprintf("couldn't remove unused layers: %v", err)})
return err
} }
} }
@ -991,12 +960,12 @@ func pullModelManifest(ctx context.Context, mp ModelPath, regOpts *registryOptio
} }
defer resp.Body.Close() defer resp.Body.Close()
var m *Manifest var m Manifest
if err := json.NewDecoder(resp.Body).Decode(&m); err != nil { if err := json.NewDecoder(resp.Body).Decode(&m); err != nil {
return nil, err return nil, err
} }
return m, err return &m, err
} }
// GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer // GetSHA256Digest returns the SHA256 hash of a given buffer and returns it, and the size of buffer

View File

@ -2,6 +2,7 @@ package server
import ( import (
"crypto/sha256" "crypto/sha256"
"errors"
"fmt" "fmt"
"io" "io"
"os" "os"
@ -15,15 +16,15 @@ type Layer struct {
status string status string
} }
func NewLayer(r io.Reader, mediatype string) (*Layer, error) { func NewLayer(r io.Reader, mediatype string) (Layer, error) {
blobs, err := GetBlobsPath("") blobs, err := GetBlobsPath("")
if err != nil { if err != nil {
return nil, err return Layer{}, err
} }
temp, err := os.CreateTemp(blobs, "sha256-") temp, err := os.CreateTemp(blobs, "sha256-")
if err != nil { if err != nil {
return nil, err return Layer{}, err
} }
defer temp.Close() defer temp.Close()
defer os.Remove(temp.Name()) defer os.Remove(temp.Name())
@ -31,28 +32,31 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
sha256sum := sha256.New() sha256sum := sha256.New()
n, err := io.Copy(io.MultiWriter(temp, sha256sum), r) n, err := io.Copy(io.MultiWriter(temp, sha256sum), r)
if err != nil { if err != nil {
return nil, err return Layer{}, err
} }
if err := temp.Close(); err != nil { if err := temp.Close(); err != nil {
return nil, err return Layer{}, err
} }
digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil)) digest := fmt.Sprintf("sha256:%x", sha256sum.Sum(nil))
blob, err := GetBlobsPath(digest) blob, err := GetBlobsPath(digest)
if err != nil { if err != nil {
return nil, err return Layer{}, err
} }
status := "using existing layer" status := "using existing layer"
if _, err := os.Stat(blob); err != nil { if _, err := os.Stat(blob); err != nil {
status = "creating new layer" status = "creating new layer"
if err := os.Rename(temp.Name(), blob); err != nil { if err := os.Rename(temp.Name(), blob); err != nil {
return nil, err return Layer{}, err
}
if err := os.Chmod(blob, 0o644); err != nil {
return Layer{}, err
} }
} }
return &Layer{ return Layer{
MediaType: mediatype, MediaType: mediatype,
Digest: digest, Digest: digest,
Size: n, Size: n,
@ -60,18 +64,22 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
}, nil }, nil
} }
func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) { func NewLayerFromLayer(digest, mediatype, from string) (Layer, error) {
if digest == "" {
return Layer{}, errors.New("creating new layer from layer with empty digest")
}
blob, err := GetBlobsPath(digest) blob, err := GetBlobsPath(digest)
if err != nil { if err != nil {
return nil, err return Layer{}, err
} }
fi, err := os.Stat(blob) fi, err := os.Stat(blob)
if err != nil { if err != nil {
return nil, err return Layer{}, err
} }
return &Layer{ return Layer{
MediaType: mediatype, MediaType: mediatype,
Digest: digest, Digest: digest,
Size: fi.Size(), Size: fi.Size(),
@ -81,6 +89,10 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
} }
func (l *Layer) Open() (io.ReadSeekCloser, error) { func (l *Layer) Open() (io.ReadSeekCloser, error) {
if l.Digest == "" {
return nil, errors.New("opening layer with empty digest")
}
blob, err := GetBlobsPath(l.Digest) blob, err := GetBlobsPath(l.Digest)
if err != nil { if err != nil {
return nil, err return nil, err
@ -90,6 +102,10 @@ func (l *Layer) Open() (io.ReadSeekCloser, error) {
} }
func (l *Layer) Remove() error { func (l *Layer) Remove() error {
if l.Digest == "" {
return nil
}
ms, err := Manifests() ms, err := Manifests()
if err != nil { if err != nil {
return err return err

View File

@ -5,6 +5,7 @@ import (
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt"
"io" "io"
"log/slog" "log/slog"
"os" "os"
@ -14,10 +15,10 @@ import (
) )
type Manifest struct { type Manifest struct {
SchemaVersion int `json:"schemaVersion"` SchemaVersion int `json:"schemaVersion"`
MediaType string `json:"mediaType"` MediaType string `json:"mediaType"`
Config *Layer `json:"config"` Config Layer `json:"config"`
Layers []*Layer `json:"layers"` Layers []Layer `json:"layers"`
filepath string filepath string
fi os.FileInfo fi os.FileInfo
@ -47,10 +48,12 @@ func (m *Manifest) Remove() error {
func (m *Manifest) RemoveLayers() error { func (m *Manifest) RemoveLayers() error {
for _, layer := range append(m.Layers, m.Config) { for _, layer := range append(m.Layers, m.Config) {
if err := layer.Remove(); errors.Is(err, os.ErrNotExist) { if layer.Digest != "" {
slog.Debug("layer does not exist", "digest", layer.Digest) if err := layer.Remove(); errors.Is(err, os.ErrNotExist) {
} else if err != nil { slog.Debug("layer does not exist", "digest", layer.Digest)
return err } else if err != nil {
return err
}
} }
} }
@ -93,7 +96,7 @@ func ParseNamedManifest(n model.Name) (*Manifest, error) {
return &m, nil return &m, nil
} }
func WriteManifest(name model.Name, config *Layer, layers []*Layer) error { func WriteManifest(name model.Name, config Layer, layers []Layer) error {
manifests, err := GetManifestPath() manifests, err := GetManifestPath()
if err != nil { if err != nil {
return err return err
@ -148,14 +151,16 @@ func Manifests() (map[model.Name]*Manifest, error) {
n := model.ParseNameFromFilepath(rel) n := model.ParseNameFromFilepath(rel)
if !n.IsValid() { if !n.IsValid() {
slog.Warn("bad manifest name", "path", rel, "error", err) slog.Warn("bad manifest name", "path", rel)
continue continue
} }
m, err := ParseNamedManifest(n) m, err := ParseNamedManifest(n)
if err != nil { if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
slog.Warn("bad manifest", "name", n, "error", err) slog.Warn("bad manifest", "name", n, "error", err)
continue continue
} else if err != nil {
return nil, fmt.Errorf("%s: %w", n, err)
} }
ms[n] = m ms[n] = m

View File

@ -26,7 +26,7 @@ import (
var intermediateBlobs map[string]string = make(map[string]string) var intermediateBlobs map[string]string = make(map[string]string)
type layerGGML struct { type layerGGML struct {
*Layer Layer
*llm.GGML *llm.GGML
} }
@ -176,9 +176,20 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
mediatype = "application/vnd.ollama.image.projector" mediatype = "application/vnd.ollama.image.projector"
} }
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype) var layer Layer
if err != nil { if digest != "" && n == stat.Size() && offset == 0 {
return nil, err layer, err = NewLayerFromLayer(digest, mediatype, file.Name())
if err != nil {
slog.Debug("could not create new layer from layer", "error", err)
}
}
// Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size())
if layer.Digest == "" {
layer, err = NewLayer(io.NewSectionReader(file, offset, n), mediatype)
if err != nil {
return nil, err
}
} }
layers = append(layers, &layerGGML{layer, ggml}) layers = append(layers, &layerGGML{layer, ggml})

View File

@ -2,8 +2,10 @@ package server
import ( import (
"bytes" "bytes"
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
@ -11,6 +13,7 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/template" "github.com/ollama/ollama/template"
) )
@ -133,3 +136,82 @@ The temperature in San Francisco, CA is 70°F and in Toronto, Canada is 20°C.`,
}) })
} }
} }
func TestParseFromFileFromLayer(t *testing.T) {
tempModels := t.TempDir()
file, err := os.CreateTemp(tempModels, "")
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
defer file.Close()
if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
if _, err := file.Seek(0, io.SeekStart); err != nil {
t.Fatalf("failed to seek to start: %v", err)
}
layers, err := parseFromFile(context.Background(), file, "", func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
if len(layers) != 1 {
t.Fatalf("got %d != want 1", len(layers))
}
if _, err := file.Seek(0, io.SeekStart); err != nil {
t.Fatalf("failed to seek to start: %v", err)
}
layers2, err := parseFromFile(context.Background(), file, layers[0].Digest, func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
if len(layers2) != 1 {
t.Fatalf("got %d != want 1", len(layers2))
}
if layers[0].Digest != layers2[0].Digest {
t.Fatalf("got %s != want %s", layers[0].Digest, layers2[0].Digest)
}
if layers[0].Size != layers2[0].Size {
t.Fatalf("got %d != want %d", layers[0].Size, layers2[0].Size)
}
if layers[0].MediaType != layers2[0].MediaType {
t.Fatalf("got %v != want %v", layers[0].MediaType, layers2[0].MediaType)
}
}
func TestParseLayerFromCopy(t *testing.T) {
tempModels := t.TempDir()
file2, err := os.CreateTemp(tempModels, "")
if err != nil {
t.Fatalf("failed to open file: %v", err)
}
defer file2.Close()
for range 5 {
if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
t.Fatalf("failed to write gguf: %v", err)
}
}
if _, err := file2.Seek(0, io.SeekStart); err != nil {
t.Fatalf("failed to seek to start: %v", err)
}
layers, err := parseFromFile(context.Background(), file2, "", func(api.ProgressResponse) {})
if err != nil {
t.Fatalf("failed to parse from file: %v", err)
}
if len(layers) != 5 {
t.Fatalf("got %d != want 5", len(layers))
}
}

View File

@ -23,6 +23,7 @@ import (
"github.com/gin-contrib/cors" "github.com/gin-contrib/cors"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"golang.org/x/sync/errgroup"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/envconfig"
@ -323,13 +324,10 @@ func (s *Server) EmbedHandler(c *gin.Context) {
input = append(input, v.(string)) input = append(input, v.(string))
} }
default: default:
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"}) if req.Input != nil {
return c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
} return
}
if len(input) == 0 {
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
return
} }
r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
@ -340,12 +338,18 @@ func (s *Server) EmbedHandler(c *gin.Context) {
checkpointLoaded := time.Now() checkpointLoaded := time.Now()
if len(input) == 0 {
c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
return
}
kvData, err := getKVData(m.ModelPath, false) kvData, err := getKVData(m.ModelPath, false)
if err != nil { if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
var count int
for i, s := range input { for i, s := range input {
tokens, err := r.Tokenize(c.Request.Context(), s) tokens, err := r.Tokenize(c.Request.Context(), s)
if err != nil { if err != nil {
@ -368,25 +372,36 @@ func (s *Server) EmbedHandler(c *gin.Context) {
} }
} }
count += len(tokens)
input[i] = s input[i] = s
} }
embeddings, err := r.Embed(c.Request.Context(), input)
if err != nil { var g errgroup.Group
slog.Error("embedding generation failed", "error", err) embeddings := make([][]float32, len(input))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"}) for i, text := range input {
return g.Go(func() error {
embedding, err := r.Embedding(c.Request.Context(), text)
if err != nil {
return err
}
embeddings[i] = normalize(embedding)
return nil
})
} }
for i, e := range embeddings.Embedding { if err := g.Wait(); err != nil {
embeddings.Embedding[i] = normalize(e) slog.Error("embedding generation failed", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Errorf("failed to generate embeddings: %v", err)})
return
} }
resp := api.EmbedResponse{ resp := api.EmbedResponse{
Model: req.Model, Model: req.Model,
Embeddings: embeddings.Embedding, Embeddings: embeddings,
TotalDuration: time.Since(checkpointStart), TotalDuration: time.Since(checkpointStart),
LoadDuration: checkpointLoaded.Sub(checkpointStart), LoadDuration: checkpointLoaded.Sub(checkpointStart),
PromptEvalCount: embeddings.PromptEvalCount, PromptEvalCount: count,
} }
c.JSON(http.StatusOK, resp) c.JSON(http.StatusOK, resp)
} }
@ -430,21 +445,20 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
return return
} }
embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt}) embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
if err != nil { if err != nil {
slog.Info(fmt.Sprintf("embedding generation failed: %v", err)) slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"}) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
return return
} }
embedding := make([]float64, len(embeddings.Embedding[0])) var e []float64
for _, v := range embedding {
for i, v := range embeddings.Embedding[0] { e = append(e, float64(v))
embedding[i] = float64(v)
} }
resp := api.EmbeddingResponse{ resp := api.EmbeddingResponse{
Embedding: embedding, Embedding: e,
} }
c.JSON(http.StatusOK, resp) c.JSON(http.StatusOK, resp)
} }
@ -824,17 +838,20 @@ func (s *Server) ListModelsHandler(c *gin.Context) {
models := []api.ListModelResponse{} models := []api.ListModelResponse{}
for n, m := range ms { for n, m := range ms {
f, err := m.Config.Open()
if err != nil {
slog.Warn("bad manifest filepath", "name", n, "error", err)
continue
}
defer f.Close()
var cf ConfigV2 var cf ConfigV2
if err := json.NewDecoder(f).Decode(&cf); err != nil {
slog.Warn("bad manifest config", "name", n, "error", err) if m.Config.Digest != "" {
continue f, err := m.Config.Open()
if err != nil {
slog.Warn("bad manifest filepath", "name", n, "error", err)
continue
}
defer f.Close()
if err := json.NewDecoder(f).Decode(&cf); err != nil {
slog.Warn("bad manifest config", "name", n, "error", err)
continue
}
} }
// tag should never be masked // tag should never be masked

View File

@ -2,6 +2,7 @@ package server
import ( import (
"bytes" "bytes"
"cmp"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@ -53,6 +54,8 @@ func (t *responseRecorder) CloseNotify() <-chan bool {
func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder { func createRequest(t *testing.T, fn func(*gin.Context), body any) *httptest.ResponseRecorder {
t.Helper() t.Helper()
// if OLLAMA_MODELS is not set, set it to the temp directory
t.Setenv("OLLAMA_MODELS", cmp.Or(os.Getenv("OLLAMA_MODELS"), t.TempDir()))
w := NewRecorder() w := NewRecorder()
c, _ := gin.CreateTestContext(w) c, _ := gin.CreateTestContext(w)

View File

@ -98,7 +98,7 @@ func TestDeleteDuplicateLayers(t *testing.T) {
} }
// create a manifest with duplicate layers // create a manifest with duplicate layers
if err := WriteManifest(n, config, []*Layer{config}); err != nil { if err := WriteManifest(n, config, []Layer{config}); err != nil {
t.Fatal(err) t.Fatal(err)
} }

View File

@ -272,76 +272,6 @@ func Test_Routes(t *testing.T) {
assert.Equal(t, "library", retrieveResp.OwnedBy) assert.Equal(t, "library", retrieveResp.OwnedBy)
}, },
}, },
{
Name: "Embed Handler Empty Input",
Method: http.MethodPost,
Path: "/api/embed",
Setup: func(t *testing.T, req *http.Request) {
embedReq := api.EmbedRequest{
Model: "t-bone",
Input: "",
}
jsonData, err := json.Marshal(embedReq)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
if contentType != "application/json; charset=utf-8" {
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatal(err)
}
var embedResp api.EmbedResponse
err = json.Unmarshal(body, &embedResp)
if err != nil {
t.Fatal(err)
}
if embedResp.Model != "t-bone" {
t.Fatalf("expected model t-bone, got %s", embedResp.Model)
}
if embedResp.Embeddings == nil {
t.Fatalf("expected embeddings to not be nil, got %v", embedResp.Embeddings)
}
if len(embedResp.Embeddings) != 0 {
t.Fatalf("expected embeddings to be empty, got %v", embedResp.Embeddings)
}
},
},
{
Name: "Embed Handler Invalid Input",
Method: http.MethodPost,
Path: "/api/embed",
Setup: func(t *testing.T, req *http.Request) {
embedReq := api.EmbedRequest{
Model: "t-bone",
Input: 2,
}
jsonData, err := json.Marshal(embedReq)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
if contentType != "application/json; charset=utf-8" {
t.Fatalf("expected content type application/json; charset=utf-8, got %s", contentType)
}
_, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatal(err)
}
if resp.StatusCode != http.StatusBadRequest {
t.Fatalf("expected status code 400, got %d", resp.StatusCode)
}
},
},
} }
t.Setenv("OLLAMA_MODELS", t.TempDir()) t.Setenv("OLLAMA_MODELS", t.TempDir())

View File

@ -193,6 +193,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
break break
} }
// Embedding models should always be loaded with parallel=1
if pending.model.CheckCapabilities(CapabilityCompletion) != nil {
numParallel = 1
}
// Evaluate if the model will fit in the available system memory, or if we should unload a model first // Evaluate if the model will fit in the available system memory, or if we should unload a model first
if len(gpus) == 1 && gpus[0].Library == "cpu" { if len(gpus) == 1 && gpus[0].Library == "cpu" {
// simplifying assumption of defaultParallel when in CPU mode // simplifying assumption of defaultParallel when in CPU mode
@ -418,7 +423,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList,
// some older models are not compatible with newer versions of llama.cpp // some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to // show a generalized compatibility error until there is a better way to
// check for model compatibility // check for model compatibility
if errors.Is(llm.ErrUnsupportedFormat, err) || strings.Contains(err.Error(), "failed to load model") { if errors.Is(err, llm.ErrUnsupportedFormat) || strings.Contains(err.Error(), "failed to load model") {
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, req.model.ShortName) err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, req.model.ShortName)
} }
slog.Info("NewLlamaServer failed", "model", req.model.ModelPath, "error", err) slog.Info("NewLlamaServer failed", "model", req.model.ModelPath, "error", err)
@ -734,7 +739,10 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
// If multiple Libraries are detected, pick the Library which loads the most layers for the model // If multiple Libraries are detected, pick the Library which loads the most layers for the model
func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList { func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
*numParallel = 1 if *numParallel <= 0 {
*numParallel = 1
req.opts.NumCtx = req.origNumCtx
}
byLibrary := gpus.ByLibrary() byLibrary := gpus.ByLibrary()
if len(byLibrary) <= 1 { if len(byLibrary) <= 1 {
return gpus return gpus

View File

@ -117,7 +117,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require.NoError(t, llm.WriteGGUF(f, llm.KV{ require.NoError(t, llm.WriteGGUF(f, llm.KV{
"general.architecture": "llama", "general.architecture": "llama",
"general.name": "name",
"llama.context_length": uint32(32), "llama.context_length": uint32(32),
"llama.embedding_length": uint32(4096), "llama.embedding_length": uint32(4096),
"llama.block_count": uint32(1), "llama.block_count": uint32(1),
@ -708,8 +707,8 @@ type mockLlm struct {
pingResp error pingResp error
waitResp error waitResp error
completionResp error completionResp error
embedResp *llm.EmbedResponse embeddingResp []float32
embedRespErr error embeddingRespErr error
tokenizeResp []int tokenizeResp []int
tokenizeRespErr error tokenizeRespErr error
detokenizeResp string detokenizeResp string
@ -727,8 +726,8 @@ func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn
return s.completionResp return s.completionResp
} }
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) { func (s *mockLlm) Embedding(ctx context.Context, input string) ([]float32, error) {
return s.embedResp, s.embedRespErr return s.embeddingResp, s.embeddingRespErr
} }
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) { func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {

8
server/sparse_common.go Normal file
View File

@ -0,0 +1,8 @@
//go:build !windows
package server
import "os"
func setSparse(*os.File) {
}

17
server/sparse_windows.go Normal file
View File

@ -0,0 +1,17 @@
package server
import (
"os"
"golang.org/x/sys/windows"
)
func setSparse(file *os.File) {
// exFat (and other FS types) don't support sparse files, so ignore errors
windows.DeviceIoControl( //nolint:errcheck
windows.Handle(file.Fd()), windows.FSCTL_SET_SPARSE,
nil, 0,
nil, 0,
nil, nil,
)
}

View File

@ -26,7 +26,7 @@ import (
var blobUploadManager sync.Map var blobUploadManager sync.Map
type blobUpload struct { type blobUpload struct {
*Layer Layer
Total int64 Total int64
Completed atomic.Int64 Completed atomic.Int64
@ -45,7 +45,7 @@ type blobUpload struct {
} }
const ( const (
numUploadParts = 64 numUploadParts = 16
minUploadPartSize int64 = 100 * format.MegaByte minUploadPartSize int64 = 100 * format.MegaByte
maxUploadPartSize int64 = 1000 * format.MegaByte maxUploadPartSize int64 = 1000 * format.MegaByte
) )
@ -362,7 +362,7 @@ func (p *progressWriter) Rollback() {
p.written = 0 p.written = 0
} }
func uploadBlob(ctx context.Context, mp ModelPath, layer *Layer, opts *registryOptions, fn func(api.ProgressResponse)) error { func uploadBlob(ctx context.Context, mp ModelPath, layer Layer, opts *registryOptions, fn func(api.ProgressResponse)) error {
requestURL := mp.BaseURL() requestURL := mp.BaseURL()
requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", layer.Digest) requestURL = requestURL.JoinPath("v2", mp.GetNamespaceRepository(), "blobs", layer.Digest)

View File

@ -219,7 +219,7 @@ func (n Name) String() string {
return b.String() return b.String()
} }
// DisplayShort returns a short string version of the name. // DisplayShortest returns a short string version of the name.
func (n Name) DisplayShortest() string { func (n Name) DisplayShortest() string {
var sb strings.Builder var sb strings.Builder