added testcase

humanNumbers formats to 3 digits, added trillion case for future
2024-06-03 17:28:05 -07:00 · 2024-06-03 17:27:38 -07:00 · 2024-06-03 17:26:02 -07:00
665 changed files with 22374 additions and 173856 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -3,7 +3,7 @@ ollama
 app
 macapp
 dist
+llm/llama.cpp
 .env
 .cache
 test_data
-llama/build
--- a/.gitattributes
+++ b/.gitattributes
@ -1,11 +1 @@
-llama/**/*.cpp linguist-vendored
-llama/**/*.hpp linguist-vendored
-llama/**/*.h linguist-vendored
-llama/**/*.c linguist-vendored
-llama/**/*.cu linguist-vendored
-llama/**/*.cuh linguist-vendored
-llama/**/*.m linguist-vendored
-llama/**/*.metal linguist-vendored
-
-* text=auto
-*.go text eol=lf
+llm/ext_server/* linguist-vendored
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -1,9 +1,5 @@
 name: release

-env:
-  ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
-  MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
-
 on:
  push:
    tags:
@ -12,7 +8,7 @@ on:
 jobs:
  # Full build of the Mac assets
  build-darwin:
-    runs-on: macos-13
+    runs-on: macos-12
    environment: release
    steps:
      - uses: actions/checkout@v4
@ -43,8 +39,8 @@ jobs:
          APPLE_PASSWORD: ${{ secrets.APPLE_PASSWORD }}
          APPLE_TEAM_ID: ${{ vars.APPLE_TEAM_ID }}
          APPLE_ID: ${{ vars.APPLE_ID }}
-          SDKROOT: /Applications/Xcode_14.1.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
-          DEVELOPER_DIR: /Applications/Xcode_14.1.0.app/Contents/Developer
+          SDKROOT: /Applications/Xcode_13.4.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+          DEVELOPER_DIR: /Applications/Xcode_13.4.1.app/Contents/Developer
        run: |
          ./scripts/build_darwin.sh

@ -52,8 +48,8 @@ jobs:
        with:
          name: dist-darwin
          path: |
-            dist/Ollama-darwin.zip
-            dist/ollama-darwin
+            dist/*arwin*
+            !dist/*-cov

  # Windows builds take a long time to both install the dependencies and build, so parallelize
  # CPU generation step
@ -64,286 +60,14 @@ jobs:
      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
    steps:
      - uses: actions/checkout@v4
-      - name: Set make jobs default
-        run: |
-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
      - name: Set Version
        shell: bash
        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - name: Add msys paths
-        run: |
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
-      - uses: actions/setup-go@v5
-        with:
-          go-version-file: go.mod
-          cache: true
-      - run: |
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make
-        name: make
-      - uses: actions/upload-artifact@v4
-        with:
-          name: generate-windows-cpu
-          path: |
-            build/**/*
-            dist/windows-amd64/**
-
-  # ROCm generation step
-  generate-windows-rocm:
-    environment: release
-    runs-on: windows
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set make jobs default
-        run: |
-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - name: Add msys paths
-        run: |
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
-      - uses: actions/setup-go@v5
-        with:
-          go-version-file: go.mod
-          cache: true
-      # ROCM installation steps
-      - name: 'Cache ROCm installer'
-        id: cache-rocm
-        uses: actions/cache@v4
-        with:
-          path: rocm-install.exe
-          key: ${{ env.ROCM_WINDOWS_URL }}
-      - name: 'Conditionally Download ROCm'
-        if: steps.cache-rocm.outputs.cache-hit != 'true'
-        run: |
-          $ErrorActionPreference = "Stop"
-          Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
-      - name: 'Install ROCm'
-        run: |
-          Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
-      - name: 'Verify ROCm'
-        run: |
-          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
-          echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-      - name: make rocm runner
-        run: |
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make -C llama print-HIP_PATH print-HIP_LIB_DIR
-          make rocm
-      - uses: actions/upload-artifact@v4
-        with:
-          name: generate-windows-rocm
-          path: |
-            build/**/*
-            dist/windows-amd64/**
-
-  # CUDA generation step
-  generate-windows-cuda:
-    environment: release
-    runs-on: windows
-    strategy:
-      matrix:
-        cuda:
-          - version: "11.3"
-            url: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
-          - version: "12.4"
-            url: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set make jobs default
-        run: |
-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-      - name: Set Version
-        shell: bash
-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
-      - name: Install msys2
-        run: |
-          $msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
-          write-host "Downloading msys2"
-          Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
-          write-host "Installing msys2"
-          Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: verify tools
-        run: |
-          get-command gcc
-          gcc --version
-          get-command make
-          make --version
-      - uses: actions/setup-go@v5
-        with:
-          go-version-file: go.mod
-          cache: true
-      # CUDA installation steps
-      - name: 'Cache CUDA installer'
-        id: cache-cuda
-        uses: actions/cache@v4
-        with:
-          path: cuda-install.exe
-          key: ${{ matrix.cuda.url }}
-      - name: 'Conditionally Download CUDA'
-        if: steps.cache-cuda.outputs.cache-hit != 'true'
-        run: |
-          $ErrorActionPreference = "Stop"
-          Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "cuda-install.exe"
-      - name: 'Install CUDA'
-        run: |
-          $subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ matrix.cuda.version }}"}
-          Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
-      - name: 'Verify CUDA'
-        run: |
-          & (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
-          $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
-          $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' 
-          echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-          echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-          echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-
-      - name: make cuda runner
-        run: |
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
-      - uses: actions/upload-artifact@v4
-        with:
-          name: generate-windows-cuda-${{ matrix.cuda.version }}
-          path: |
-            build/**/*
-            dist/windows-amd64/**
-
-  # windows arm64 generate, go build, and zip file (no installer)
-  # Output of this build is aggregated into the final x86 build
-  # for a unified windows installer
-  windows-arm64:
-    runs-on: windows-arm64
-    environment: release
-    env:
-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-    steps:
-      # The current Windows arm64 beta image has effectively zero dev tools installed...
-      - name: Install git and gzip
-        run: |
-          Set-ExecutionPolicy Bypass -Scope Process -Force
-          [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072
-          iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
-          choco install -y --no-progress git gzip
-          echo "C:\Program Files\Git\cmd" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "C:\ProgramData\chocolatey\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      # pacman is buggy on win arm64, so we avoid using it, but rely on the binary artifacts
-      # we download the sfx (7zip bundle) which isn't fully set up, but the binaries we need to build work
-      - name: Install msys2 x64
-        run: |
-          $url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-base-x86_64-20240727.sfx.exe"
-          write-host "Downloading MSYS2"
-          Invoke-WebRequest -Uri "$url" -outfile "${env:RUNNER_TEMP}\msys2.exe"
-          write-host "Installing msys2"
-          Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @(
-              '-y', '-oC:\'
-              ) -NoNewWindow -Wait
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      # since pacman isn't reliable, we just download the tar file and extract directly
-      - name: Downloading and extracting msys2 make tar file
-        run: |
-          $url="https://mirror.msys2.org/msys/x86_64/make-4.4.1-2-x86_64.pkg.tar.zst"
-          write-host "Downloading make"
-          Invoke-WebRequest -Uri "$url" -outfile c:\msys64\make.tar.zst
-          cd c:\msys64; tar -xf make.tar.zst
-          rm c:\msys64\make.tar.zst
-      - name: Verify Make works properly
-        run: |
-          echo $env:PATH
-          make --version
-      - name: Install Visual Studio 2022
-        run: |
-          $components = @(
-            "Microsoft.VisualStudio.Component.CoreEditor",
-            "Microsoft.VisualStudio.Workload.CoreEditor",
-            "Microsoft.VisualStudio.Component.Roslyn.Compiler",
-            "Microsoft.Component.MSBuild",
-            "Microsoft.VisualStudio.Component.TextTemplating",
-            "Microsoft.VisualStudio.Component.Debugger.JustInTime",
-            "Microsoft.VisualStudio.Component.VC.CoreIde",
-            "Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
-            "Microsoft.VisualStudio.Component.Windows11SDK.22621",
-            "Microsoft.VisualStudio.Component.VC.Tools.ARM64EC",
-            "Microsoft.VisualStudio.Component.VC.Tools.ARM64",
-            "Microsoft.VisualStudio.Component.VC.ATL",
-            "Microsoft.VisualStudio.Component.VC.ATL.ARM64",
-            "Microsoft.VisualStudio.Component.Graphics",
-            "Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
-            "Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
-            "Microsoft.VisualStudio.Component.Windows11Sdk.WindowsPerformanceToolkit",
-            "Microsoft.VisualStudio.Component.CppBuildInsights",
-            "Microsoft.VisualStudio.Component.VC.DiagnosticTools",
-            "Microsoft.VisualStudio.ComponentGroup.WebToolsExtensions.CMake",
-            "Microsoft.VisualStudio.Component.VC.CMake.Project",
-            "Microsoft.VisualStudio.Component.VC.ASAN",
-            "Microsoft.VisualStudio.Component.Vcpkg",
-            "Microsoft.VisualStudio.Workload.NativeDesktop"
-          )
-          $config = @{
-                "version" = "1.0"
-                "components"  = $components
-                "extensions"  = @()
-            }
-          $configPath = "${env:RUNNER_TEMP}\vsconfig"
-          $config | ConvertTo-Json | Out-File -FilePath $configPath
-          $bootstrapperFilePath = "${env:RUNNER_TEMP}\vs_community.exe"
-          write-host "Downloading Visual Studio 2022"
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_community.exe" -outfile $bootstrapperFilePath
-          $bootstrapperArgumentList = ('/c', $bootstrapperFilePath, '--config', $configPath, '--quiet', '--wait' )
-          write-host "Installing Visual Studio 2022"
-          $process = Start-Process -FilePath cmd.exe -ArgumentList $bootstrapperArgumentList -Wait -PassThru
-          $exitCode = $process.ExitCode
-          write-host $exitCode
-      # pacman in mingw/msys2 is ~broken on windows arm right now - hangs consistently during attempts to install
-      # so we'll use this alternative GCC binary
-      - name: Install llvm-mingw GCC
-        run: |
-          $gcc_url="https://github.com/mstorsjo/llvm-mingw/releases/download/20240619/llvm-mingw-20240619-ucrt-aarch64.zip"
-          write-host "Downloading llvm-mingw"
-          Invoke-WebRequest -Uri "${gcc_url}" -OutFile "${env:RUNNER_TEMP}\gcc.zip"
-          write-host "Unpacking llvm-mingw"
-          expand-archive -path "${env:RUNNER_TEMP}\gcc.zip" -destinationpath "c:\"
-          mv c:\llvm-mingw-* c:\llvm-mingw
-          echo "c:\llvm-mingw\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Verify GCC
-        run: |
-          echo $env:PATH
-          gcc --version
-      - uses: actions/checkout@v4
-      - name: Set Version
-        run: |
-          $ver=${env:GITHUB_REF_NAME}.trim("v")
-          echo VERSION=$ver | Out-File -FilePath ${env:GITHUB_ENV} -Encoding utf8 -Append
      - uses: 'google-github-actions/auth@v2'
        with:
          project_id: 'ollama'
          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
-      - run: echo "${{ vars.OLLAMA_CERT }}" | Out-File -FilePath ollama_inc.crt -Encoding utf8
+      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
      - name: install Windows SDK 8.1 to get signtool
        run: |
          $ErrorActionPreference = "Stop"
@ -368,23 +92,180 @@ jobs:
      - run: go get ./...
      - run: |
          $gopath=(get-command go).source | split-path -parent
-          $gccpath=(get-command gcc).source | split-path -parent
-          import-module 'C:\Program Files\Microsoft Visual Studio\2022\Community\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -Arch arm64 -vsinstallpath 'C:\Program Files\Microsoft Visual Studio\2022\Community' -skipautomaticlocation
-          $env:PATH="$gopath;$gccpath;$env:PATH"
-          echo $env:PATH
-          $env:ARCH="arm64"
-          .\scripts\build_windows.ps1 buildOllama buildApp gatherDependencies sign distZip
-        name: 'Windows Build'
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+          $env:PATH="$gopath;$env:PATH"
+          go generate -x ./...
+        name: go generate
      - uses: actions/upload-artifact@v4
        with:
-          name: windows-arm64
+          name: generate-windows-cpu
          path: |
-            dist/windows-arm64/**
-            dist/windows-arm64-app.exe
-            dist/ollama-windows-arm64.zip
+            llm/build/**/bin/*
+            llm/build/**/*.a
+            dist/windows-amd64/**

-  # Import the prior generation steps plus the full arm64 build, and build the final windows assets
+  # ROCm generation step
+  generate-windows-rocm:
+    environment: release
+    runs-on: windows
+    env:
+      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set Version
+        shell: bash
+        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
+      - uses: 'google-github-actions/auth@v2'
+        with:
+          project_id: 'ollama'
+          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
+      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
+      - name: install Windows SDK 8.1 to get signtool
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading SDK"
+          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
+          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
+          write-host "Win SDK 8.1 installed"
+          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
+      - name: install signing plugin
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading plugin"
+          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
+          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
+          write-host "Installing plugin"
+          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
+          write-host "plugin installed"
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+      - name: 'Install ROCm'
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading AMD HIP Installer"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          write-host "Installing AMD HIP"
+          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+          write-host "Completed AMD HIP"
+      - name: 'Verify ROCm'
+        run: |
+          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+      - run: go get ./...
+      - run: |
+          $gopath=(get-command go).source | split-path -parent
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+          $env:PATH="$gopath;$env:PATH"
+          $env:OLLAMA_SKIP_CPU_GENERATE="1"
+          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+          go generate -x ./...
+        name: go generate
+      - name: 'gather rocm dependencies'
+        run: |
+          $HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+          md "dist\deps\bin\rocblas\library"
+          cp "${HIP_PATH}\bin\hipblas.dll" "dist\deps\bin\"
+          cp "${HIP_PATH}\bin\rocblas.dll" "dist\deps\bin\"
+          cp "${HIP_PATH}\bin\rocblas\library\*" "dist\deps\bin\rocblas\library\"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: generate-windows-rocm
+          path: |
+            llm/build/**/bin/*
+            dist/windows-amd64/**
+      - uses: actions/upload-artifact@v4
+        with:
+          name: windows-rocm-deps
+          path: dist/deps/*
+
+  # CUDA generation step
+  generate-windows-cuda:
+    environment: release
+    runs-on: windows
+    env:
+      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set Version
+        shell: bash
+        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
+      - uses: 'google-github-actions/auth@v2'
+        with:
+          project_id: 'ollama'
+          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
+      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
+      - name: install Windows SDK 8.1 to get signtool
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading SDK"
+          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
+          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
+          write-host "Win SDK 8.1 installed"
+          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
+      - name: install signing plugin
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading plugin"
+          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
+          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
+          write-host "Installing plugin"
+          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
+          write-host "plugin installed"
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+      - name: 'Install CUDA'
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading CUDA Installer"
+          Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
+          write-host "Installing CUDA"
+          Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
+          write-host "Completed CUDA"
+          $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
+          $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' 
+          echo "$cudaPath\bin" >> $env:GITHUB_PATH
+          echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
+          echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
+          echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
+      - name: 'Verify CUDA'
+        run: nvcc -V
+      - run: go get ./...
+      - name: go generate
+        run: |
+          $gopath=(get-command go).source | split-path -parent
+          $cudabin=(get-command nvcc).source | split-path
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+          $env:PATH="$gopath;$cudabin;$env:PATH"
+          $env:OLLAMA_SKIP_CPU_GENERATE="1"
+          go generate -x ./...
+      - name: 'gather cuda dependencies'
+        run: |
+          $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
+          md "dist\deps"
+          cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
+          cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
+          cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: generate-windows-cuda
+          path: |
+            llm/build/**/bin/*
+            dist/windows-amd64/**
+      - uses: actions/upload-artifact@v4
+        with:
+          name: windows-cuda-deps
+          path: dist/deps/*
+
+  # Import the prior generation steps and build the final windows assets
  build-windows:
    environment: release
    runs-on: windows
@ -392,7 +273,6 @@ jobs:
      - generate-windows-cuda
      - generate-windows-rocm
      - generate-windows-cpu
-      - windows-arm64
    env:
      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
    steps:
@ -424,24 +304,6 @@ jobs:
          write-host "Installing plugin"
          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
          write-host "plugin installed"
-      - name: Install msys2
-        run: |
-          $msys2_url="https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe"
-          write-host "Downloading msys2"
-          Invoke-WebRequest -Uri "${msys2_url}" -OutFile "${env:RUNNER_TEMP}\msys2.exe"
-          write-host "Installing msys2"
-          Start-Process "${env:RUNNER_TEMP}\msys2.exe" -ArgumentList @("in", "--confirm-command", "--accept-messages", "--root", "C:/msys64") -NoNewWindow -Wait
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang", "make") -NoNewWindow -Wait
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: verify tools
-        run: |
-          get-command gcc
-          gcc --version
-          get-command make
-          make --version
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
@ -452,24 +314,24 @@ jobs:
          name: generate-windows-cpu
      - uses: actions/download-artifact@v4
        with:
-          name: generate-windows-cuda-11.3
+          name: generate-windows-cuda
      - uses: actions/download-artifact@v4
        with:
-          name: generate-windows-cuda-12.4
+          name: windows-cuda-deps
+      - uses: actions/download-artifact@v4
+        with:
+          name: windows-rocm-deps
      - uses: actions/download-artifact@v4
        with:
          name: generate-windows-rocm
-      - uses: actions/download-artifact@v4
-        with:
-          name: windows-arm64
-          path: dist
-      - run: dir build
+      - run: dir llm/build
      - run: |
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
+          $gopath=(get-command go).source | split-path -parent
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+          $env:PATH="$gopath;$env:PATH"
          $env:OLLAMA_SKIP_GENERATE="1"
-          $env:ARCH="amd64"
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
          & .\scripts\build_windows.ps1
      - uses: actions/upload-artifact@v4
        with:
@ -483,7 +345,9 @@ jobs:
    environment: release
    runs-on: linux
    env:
-      PLATFORM: linux/amd64
+      OLLAMA_SKIP_MANIFEST_CREATE: '1'
+      BUILD_ARCH: amd64
+      PUSH: '1'
    steps:
      - uses: actions/checkout@v4
        with:
@ -491,8 +355,15 @@ jobs:
      - name: Set Version
        shell: bash
        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ vars.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
      - run: |
          ./scripts/build_linux.sh
+          ./scripts/build_docker.sh
+          mv dist/deps/* dist/
      - uses: actions/upload-artifact@v4
        with:
          name: dist-linux-amd64
@ -506,7 +377,9 @@ jobs:
    environment: release
    runs-on: linux-arm64
    env:
-      PLATFORM: linux/arm64
+      OLLAMA_SKIP_MANIFEST_CREATE: '1'
+      BUILD_ARCH: arm64
+      PUSH: '1'
    steps:
      - uses: actions/checkout@v4
        with:
@ -535,8 +408,14 @@ jobs:
          sudo usermod -aG docker $USER
          sudo apt-get install acl
          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ vars.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
      - run: |
          ./scripts/build_linux.sh
+          ./scripts/build_docker.sh
      - uses: actions/upload-artifact@v4
        with:
          name: dist-linux-arm64
@ -544,178 +423,6 @@ jobs:
            dist/*linux*
            !dist/*-cov

-  # Container image build
-  build-container-image:
-    environment: release
-    strategy:
-      matrix:
-        runner:
-          - linux
-          - linux-arm64
-    runs-on: ${{ matrix.runner }}
-    env:
-      FINAL_IMAGE_REPO: ollama/ollama
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: 'Install Docker'
-        if: ${{ startsWith(matrix.runner, 'linux-arm64') }}
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y ca-certificates curl
-          sudo install -m 0755 -d /etc/apt/keyrings
-          sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
-          sudo chmod a+r /etc/apt/keyrings/docker.asc
-          echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
-            $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
-            sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
-          sudo apt-get update
-          sudo apt-get install -y docker-ce docker-ce-cli containerd.io
-          sudo usermod -aG docker $USER
-          sudo apt-get install acl
-          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.FINAL_IMAGE_REPO }}
-          flavor: |
-            latest=false
-          tags: |
-            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
-            type=semver,pattern={{version}}
-      - name: Set Version
-        shell: bash
-        run: |
-          machine=$(uname -m)
-          case ${machine} in
-            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
-            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
-          esac >>$GITHUB_ENV
-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USER }}
-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: "."
-          platforms: linux/${{ env.ARCH }}
-          build-args: |
-            GOFLAGS
-          outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
-      - name: Export digest
-        run: |
-          mkdir -p /tmp/digests
-          digest="${{ steps.build.outputs.digest }}"
-          touch "/tmp/digests/${digest#sha256:}"
-      - name: Upload digest
-        uses: actions/upload-artifact@v4
-        with:
-          name: digests-${{ env.PLATFORM_PAIR }}
-          path: /tmp/digests/*
-          if-no-files-found: error
-          retention-days: 1
-  merge:
-    environment: release
-    runs-on: linux
-    needs:
-      - build-container-image
-    env:
-      FINAL_IMAGE_REPO: ollama/ollama
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          path: /tmp/digests
-          pattern: digests-*
-          merge-multiple: true
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.FINAL_IMAGE_REPO }}
-          flavor: |
-            latest=false
-          tags: |
-            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
-            type=semver,pattern={{version}}
-      - name: Set Version
-        shell: bash
-        run: |
-          machine=$(uname -m)
-          case ${machine} in
-            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
-            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
-          esac >>$GITHUB_ENV
-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USER }}
-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
-            $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}          
-  build-container-image-rocm:
-    environment: release
-    runs-on: linux
-    env:
-      FINAL_IMAGE_REPO: ollama/ollama
-      ARCH: amd64
-      PLATFORM_PAIR: linux-amd64
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.FINAL_IMAGE_REPO }}
-          flavor: |
-            latest=false
-          tags: |
-            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
-            type=semver,pattern={{version}}
-      - name: Set Version
-        shell: bash
-        run: |
-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USER }}
-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: "."
-          target: runtime-rocm
-          build-args: |
-            GOFLAGS
-          tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm
-          push: true
-
  # Aggregate all the assets and ship a release
  release:
    needs:
@ -728,7 +435,8 @@ jobs:
    permissions:
      contents: write
    env:
-      GH_TOKEN: ${{ github.token }}
+      OLLAMA_SKIP_IMAGE_BUILD: '1'
+      PUSH: '1'
    steps:
      - uses: actions/checkout@v4
      - name: Set Version
@ -736,6 +444,12 @@ jobs:
        run: |
          echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
          echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ vars.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
+      - run: ./scripts/build_docker.sh
      - name: Retrieve built artifact
        uses: actions/download-artifact@v4
        with:
@ -744,23 +458,17 @@ jobs:
          merge-multiple: true
      - run: |
          ls -lh dist/
-          (cd dist; find . -type f | xargs sha256sum > ../sha256sum.txt)
-          mv sha256sum.txt dist/
+          (cd dist; sha256sum * > sha256sum.txt)
          cat dist/sha256sum.txt
-      - name: Create or update Release
-        run: |
-          echo "Looking for existing release for ${{ env.RELEASE_VERSION }}"
-          OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${{ env.RELEASE_VERSION }}\") | .tagName")
-          if [ -n "$OLD_TAG" ]; then
-            echo "Updating release ${{ env.RELEASE_VERSION }} to point to new tag ${GITHUB_REF_NAME}"
-            gh release edit ${OLD_TAG} --tag ${GITHUB_REF_NAME}
-          else
-            echo "Creating new release ${{ env.RELEASE_VERSION }} pointing to tag ${GITHUB_REF_NAME}"
-            gh release create ${GITHUB_REF_NAME} \
-              --title ${{ env.RELEASE_VERSION }} \
-              --draft \
-              --generate-notes \
-              --prerelease
-          fi
-          echo "Uploading artifacts for tag ${GITHUB_REF_NAME}"
-          gh release upload ${GITHUB_REF_NAME} dist/* --clobber
+      - uses: ncipollo/release-action@v1
+        with:
+          name: ${{ env.RELEASE_VERSION }}
+          allowUpdates: true
+          artifacts: 'dist/*'
+          draft: true
+          prerelease: true
+          omitBodyDuringUpdate: true
+          generateReleaseNotes: true
+          omitDraftDuringUpdate: true
+          omitPrereleaseDuringUpdate: true
+          replacesArtifacts: true
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@ -1,11 +1,5 @@
 name: test

-env:
-  ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
-  MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
-  CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
-  CUDA_12_WINDOWS_VER: 12.4
-
 concurrency:
  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
  # cancels running CI jobs and starts all new ones.
@ -27,7 +21,9 @@ jobs:
  changes:
    runs-on: ubuntu-latest
    outputs:
-      RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
+      GENERATE: ${{ steps.changes.outputs.GENERATE }}
+      GENERATE_CUDA: ${{ steps.changes.outputs.GENERATE_CUDA }}
+      GENERATE_ROCM: ${{ steps.changes.outputs.GENERATE_ROCM }}
    steps:
      - uses: actions/checkout@v4
        with:
@ -42,167 +38,14 @@ jobs:
          }

          {
-            echo RUNNERS=$(changed 'llama/**')
+            echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
+            echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
+            echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
          } >>$GITHUB_OUTPUT

-  runners-linux-cuda:
+  generate:
    needs: [changes]
-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
-    strategy:
-      matrix:
-        cuda-version:
-          - '11.8.0'
-    runs-on: linux
-    container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
-    steps:
-      - run: |
-          apt-get update && apt-get install -y git build-essential curl
-        env:
-          DEBIAN_FRONTEND: noninteractive
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v4
-        with:
-          go-version-file: go.mod
-          cache: true
-      - run: go get ./...
-      - run: |
-          git config --global --add safe.directory /__w/ollama/ollama
-          cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
-          make -j $cores cuda_v11
-  runners-linux-rocm:
-    needs: [changes]
-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
-    strategy:
-      matrix:
-        rocm-version:
-          - '6.1.2'
-    runs-on: linux
-    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
-    steps:
-      - run: |
-          apt-get update && apt-get install -y git build-essential curl rocm-libs
-        env:
-          DEBIAN_FRONTEND: noninteractive
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v4
-        with:
-          go-version-file: go.mod
-          cache: true
-      - run: go get ./...
-      - run: |
-          git config --global --add safe.directory /__w/ollama/ollama
-          cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
-          make -j $cores rocm
-
-  # ROCm generation step
-  runners-windows-rocm:
-    needs: [changes]
-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
-    runs-on: windows
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
-        with:
-          go-version-file: go.mod
-          cache: true
-      - name: Set make jobs default
-        run: |
-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-
-      # ROCM installation steps
-      - name: 'Cache ROCm installer'
-        id: cache-rocm
-        uses: actions/cache@v4
-        with:
-          path: rocm-install.exe
-          key: ${{ env.ROCM_WINDOWS_URL }}
-      - name: 'Conditionally Download ROCm'
-        if: steps.cache-rocm.outputs.cache-hit != 'true'
-        run: |
-          $ErrorActionPreference = "Stop"
-          Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
-      - name: 'Install ROCm'
-        run: |
-          Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
-      - name: 'Verify ROCm'
-        run: |
-          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
-          echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-
-      - name: Add msys paths
-        run: |
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
-
-      - name: make rocm runner
-        run: |
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make -C llama print-HIP_PATH print-HIP_LIB_DIR
-          make rocm
-
-  # CUDA generation step
-  runners-windows-cuda:
-    needs: [changes]
-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
-    runs-on: windows
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-go@v5
-        with:
-          go-version-file: go.mod
-          cache: true
-      - name: Set make jobs default
-        run: |
-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-
-      # CUDA installation steps
-      - name: 'Cache CUDA installer'
-        id: cache-cuda
-        uses: actions/cache@v4
-        with:
-          path: cuda-install.exe
-          key: ${{ env.CUDA_12_WINDOWS_URL }}
-      - name: 'Conditionally Download CUDA'
-        if: steps.cache-cuda.outputs.cache-hit != 'true'
-        run: |
-          $ErrorActionPreference = "Stop"
-          Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe"
-      - name: 'Install CUDA'
-        run: |
-          $subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"}
-          Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
-      - name: 'Verify CUDA'
-        run: |
-          & (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
-          $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
-          $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' 
-          echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-          echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-          echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
-
-      - name: Add msys paths
-        run: |
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
-      - name: make cuda runner
-        run: |
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
-
-  runners-cpu:
-    needs: [changes]
-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
+    if: ${{ needs.changes.outputs.GENERATE == 'True' }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-2019]
@ -215,39 +58,178 @@ jobs:
    runs-on: ${{ matrix.os }}
    env:
      GOARCH: ${{ matrix.arch }}
-      ARCH: ${{ matrix.arch }}
-      CGO_ENABLED: '1'
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-go@v5
        with:
          go-version-file: go.mod
          cache: true
-      - name: Add msys paths
-        if: ${{ startsWith(matrix.os, 'windows-') }}
-        run: |
-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
-      - name: Install msys2 tools
-        if: ${{ startsWith(matrix.os, 'windows-') }}
-        run: |
-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
-      - name: 'Build Windows Go Runners'
-        if: ${{ startsWith(matrix.os, 'windows-') }}
-        run: |
+      - run: go get ./...
+      - run: |
          $gopath=(get-command go).source | split-path -parent
          $gccpath=(get-command gcc).source | split-path -parent
-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
          $env:PATH="$gopath;$gccpath;$env:PATH"
          echo $env:PATH
-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
-          make -j 4      
-      - name: 'Build Unix Go Runners'
+          go generate -x ./...
+        if: ${{ startsWith(matrix.os, 'windows-') }}
+        name: 'Windows Go Generate'
+      - run: go generate -x ./...
        if: ${{ ! startsWith(matrix.os, 'windows-') }}
-        run: make -j 4
-      - run: go build .
+        name: 'Unix Go Generate'
+      - uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
+          path: |
+            llm/build/**/bin/*
+            llm/build/**/*.a
+  generate-cuda:
+    needs: [changes]
+    if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
+    strategy:
+      matrix:
+        cuda-version:
+          - '11.8.0'
+    runs-on: linux
+    container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
+    steps:
+      - run: |
+          apt-get update && apt-get install -y git build-essential curl
+          curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
+            | tar -zx -C /usr --strip-components 1
+        env:
+          DEBIAN_FRONTEND: noninteractive
+      - uses: actions/checkout@v4
+      - uses: actions/setup-go@v4
+        with:
+          go-version-file: go.mod
+          cache: true
+      - run: go get ./...
+      - run: |
+          git config --global --add safe.directory /__w/ollama/ollama
+          go generate -x ./...
+        env:
+          OLLAMA_SKIP_CPU_GENERATE: '1'
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cuda-${{ matrix.cuda-version }}-libraries
+          path: |
+            llm/build/**/bin/*
+            dist/windows-amd64/**
+  generate-rocm:
+    needs: [changes]
+    if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
+    strategy:
+      matrix:
+        rocm-version:
+          - '6.0.2'
+    runs-on: linux
+    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
+    steps:
+      - run: |
+          apt-get update && apt-get install -y git build-essential curl rocm-libs
+          curl -fsSL https://github.com/Kitware/CMake/releases/download/v3.28.1/cmake-3.28.1-linux-x86_64.tar.gz \
+            | tar -zx -C /usr --strip-components 1
+        env:
+          DEBIAN_FRONTEND: noninteractive
+      - uses: actions/checkout@v4
+      - uses: actions/setup-go@v4
+        with:
+          go-version-file: go.mod
+          cache: true
+      - run: go get ./...
+      - run: |
+          git config --global --add safe.directory /__w/ollama/ollama
+          go generate -x ./...
+        env:
+          OLLAMA_SKIP_CPU_GENERATE: '1'
+      - uses: actions/upload-artifact@v4
+        with:
+          name: rocm-${{ matrix.rocm-version }}-libraries
+          path: |
+            llm/build/**/bin/*
+            dist/windows-amd64/**
+
+  # ROCm generation step
+  generate-windows-rocm:
+    needs: [changes]
+    if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
+    runs-on: windows
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+      - name: 'Install ROCm'
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading AMD HIP Installer"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          write-host "Installing AMD HIP"
+          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+          write-host "Completed AMD HIP"
+      - name: 'Verify ROCm'
+        run: |
+          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+      - run: go get ./...
+      - run: |
+          $gopath=(get-command go).source | split-path -parent
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+          $env:PATH="$gopath;$env:PATH"
+          $env:OLLAMA_SKIP_CPU_GENERATE="1"
+          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+          go generate -x ./...
+        name: go generate
+        env:
+          OLLAMA_SKIP_CPU_GENERATE: '1'
+      # TODO - do we need any artifacts?
+
+  # CUDA generation step
+  generate-windows-cuda:
+    needs: [changes]
+    if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
+    runs-on: windows
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+      - name: 'Install CUDA'
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "downloading CUDA Installer"
+          Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
+          write-host "Installing CUDA"
+          Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
+          write-host "Completed CUDA"
+          $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
+          $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' 
+          echo "$cudaPath\bin" >> $env:GITHUB_PATH
+          echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
+          echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
+          echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
+      - name: 'Verify CUDA'
+        run: nvcc -V
+      - run: go get ./...
+      - name: go generate
+        run: |
+          $gopath=(get-command go).source | split-path -parent
+          $cudabin=(get-command nvcc).source | split-path
+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
+          cd $env:GITHUB_WORKSPACE
+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
+          $env:PATH="$gopath;$cudabin;$env:PATH"
+          $env:OLLAMA_SKIP_CPU_GENERATE="1"
+          go generate -x ./...
+        env:
+          OLLAMA_SKIP_CPU_GENERATE: '1'
+      # TODO - do we need any artifacts?

  lint:
    strategy:
@ -279,9 +261,17 @@ jobs:
            arm64) echo ARCH=arm64 ;;
          esac >>$GITHUB_ENV
        shell: bash
-      - uses: golangci/golangci-lint-action@v6
+      - run: |
+          mkdir -p llm/build/linux/$ARCH/stub/bin
+          touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
+        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
+      - run: |
+          mkdir -p llm/build/darwin/$ARCH/stub/bin
+          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
+        if: ${{ startsWith(matrix.os, 'macos-') }}
+      - uses: golangci/golangci-lint-action@v4
        with:
-          args: --timeout 10m0s -v
+          args: --timeout 8m0s -v
  test:
    strategy:
      matrix:
@ -296,6 +286,9 @@ jobs:
    env:
      GOARCH: ${{ matrix.arch }}
      CGO_ENABLED: '1'
+      OLLAMA_CPU_TARGET: 'static'
+      OLLAMA_SKIP_CPU_GENERATE: '1'
+      OLLAMA_SKIP_METAL_GENERATE: '1'
    steps:
      - uses: actions/checkout@v4
        with:
@ -306,21 +299,23 @@ jobs:
          cache: true
      - run: |
          case ${{ matrix.arch }} in
-            amd64) echo ARCH=amd64 ;;
+            amd64) echo ARCH=x86_64 ;;
            arm64) echo ARCH=arm64 ;;
          esac >>$GITHUB_ENV
        shell: bash
+      - run: |
+          mkdir -p llm/build/linux/$ARCH/stub/bin
+          touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
+        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
+      - run: |
+          mkdir -p llm/build/darwin/$ARCH/stub/bin
+          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
+        if: ${{ startsWith(matrix.os, 'macos-') }}
+        shell: bash
+      - run: go generate ./...
      - run: go build
      - run: go test -v ./...
-
-  patches:
-    needs: [changes]
-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/upload-artifact@v4
        with:
-          submodules: recursive
-      - name: Verify patches carry all the changes
-        run: |
-          make apply-patches sync && git diff --compact-summary --exit-code llama
+          name: ${{ matrix.os }}-binaries
+          path: ollama
--- a/.gitignore
+++ b/.gitignore
@ -5,14 +5,11 @@
 .swp
 dist
 ollama
+ggml-metal.metal
 .cache
 *.exe
 .idea
 test_data
 *.crt
 llm/build
-build/*/*/*
-!build/**/placeholder
-llama/build
-__debug_bin*
-llama/vendor
+__debug_bin*
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,4 @@
+[submodule "llama.cpp"]
+	path = llm/llama.cpp
+	url = https://github.com/ggerganov/llama.cpp.git
+	shallow = true
--- a/.golangci.yaml
+++ b/.golangci.yaml
@ -7,41 +7,11 @@ linters:
    - bodyclose
    - containedctx
    - contextcheck
-    - errcheck
    - exportloopref
-    - gci
    - gocheckcompilerdirectives
-    - gofmt
-    - gofumpt
-    - gosimple
-    - govet
-    - ineffassign
-    - intrange
-    - makezero
+    # FIXME: for some reason this errors on windows
+    # - gofmt
+    # - goimports
    - misspell
    - nilerr
-    - nolintlint
-    - nosprintfhostport
-    - staticcheck
-    - tenv
-    - unconvert
    - unused
-    - usestdlibvars
-    - wastedassign
-    - whitespace
-linters-settings:
-  gci:
-    sections: [standard, default, localmodule]
-  staticcheck:
-    checks:
-      - all
-      - -SA1019 # omit Deprecated check
-severity:
-  default-severity: error
-  rules:
-    - linters:
-        - gofmt
-        - goimports
-        - intrange
-        - usestdlibvars
-      severity: info
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1,37 +0,0 @@
-# Contributing to Ollama
-
-Thank you for your interest in contributing to Ollama! Here are a few guidelines to help get you started.
-
-## Set up
-
-See the [development documentation](./docs/development.md) for instructions on how to build and run Ollama locally.
-
-## Pull requests
-
-### Ideal issues
-
-* [Bugs](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Abug): issues where Ollama stops working or where it results in an unexpected error.
-* [Performance](https://github.com/ollama/ollama/issues?q=is%3Aissue+is%3Aopen+label%3Aperformance): issues to make Ollama faster at model inference, downloading or uploading.
-* [Security](https://github.com/ollama/ollama/blob/main/SECURITY.md): issues that could lead to a security vulnerability. As mentioned in [SECURITY.md](https://github.com/ollama/ollama/blob/main/SECURITY.md), please do not disclose security vulnerabilities publicly.
-
-### Issues that are harder to review
-
-* New features: new features (e.g. API fields, environment variables) add surface area to Ollama and make it harder to maintain in the long run as they cannot be removed without potentially breaking users in the future.
-* Refactoring: large code improvements are important, but can be harder or take longer to review and merge.
-* Documentation: small updates to fill in or correct missing documentation is helpful, however large documentation additions can be hard to maintain over time.
-
-### Issues that may not be accepted
-
-* Changes that break backwards compatibility in Ollama's API (including the OpenAI-compatible API)
-* Changes that add significant friction to the user experience
-* Changes that create a large future maintenance burden for maintainers and contributors
-
-### Best practices
-
-* Commit messages: please leave both a title and a description in your commit messages. The title should be a short summary of the changes, with a leading word that explains the section of the code being changed (e.g. `api: fix parsing of prompt field`) . In the description, leave a short 2-3 sentences that explain more about the change and its impact.
-* Tests: please add test coverage to changes where possible.
-* Minimize dependencies: avoid adding new dependencies unless absolutely necessary.
-
-## Need help?
-
-If you need help with anything, feel free to reach out to us on our [Discord server](https://discord.gg/ollama).
--- a/318
+++ b/318
@ -1,263 +1,131 @@
-ARG GOLANG_VERSION=1.22.8
+ARG GOLANG_VERSION=1.22.1
 ARG CMAKE_VERSION=3.22.1
-ARG CUDA_VERSION_11=11.3.1
-ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
-ARG CUDA_VERSION_12=12.4.0
-ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
-ARG ROCM_VERSION=6.1.2
-ARG JETPACK_6=r36.2.0
-ARG JETPACK_5=r35.4.1
+# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
+ARG CUDA_VERSION=11.3.1
+ARG ROCM_VERSION=6.0.2

-### To create a local image for building linux binaries on mac or windows with efficient incremental builds
-#
-# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
-# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
-#
-### Then incremental builds will be much faster in this container
-#
-# make -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
-#
-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
+# Copy the minimal context we need to run the generate scripts
+FROM scratch AS llm-code
+COPY .git .git
+COPY .gitmodules .gitmodules
+COPY llm llm
+
+FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
 ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-ARG CUDA_VERSION_11
-ARG CUDA_VERSION_12
 COPY ./scripts/rh_linux_deps.sh /
-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
-ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
-    dnf clean all && \
-    dnf install -y \
-    zsh \
-    cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
-    cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
-# TODO intel oneapi goes here...
-ENV GOARCH amd64
-ENV CGO_ENABLED 1
-WORKDIR /go/src/github.com/ollama/ollama/
-ENTRYPOINT [ "zsh" ]
+RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+ARG CGO_CFLAGS
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh

-### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
-# Note: this does not contain jetson variants
-#
-# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
-# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
-#
-FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
 ARG CMAKE_VERSION
-ARG GOLANG_VERSION
-ARG CUDA_VERSION_11
-ARG CUDA_VERSION_12
 COPY ./scripts/rh_linux_deps.sh /
-RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
-    dnf config-manager --set-enabled appstream && \
-    dnf clean all && \
-    dnf install -y \
-    zsh \
-    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
-    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
-ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
-ENV GOARCH amd64
-ENV CGO_ENABLED 1
-WORKDIR /go/src/github.com/ollama/ollama/
-ENTRYPOINT [ "zsh" ]
-
-FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
-COPY . .
-ARG OLLAMA_SKIP_CUDA_GENERATE
-ARG OLLAMA_SKIP_CUDA_11_GENERATE
-ARG OLLAMA_SKIP_CUDA_12_GENERATE
-ARG OLLAMA_SKIP_ROCM_GENERATE
-ARG CUDA_V11_ARCHITECTURES
-ARG CUDA_V12_ARCHITECTURES
-ARG OLLAMA_FAST_BUILD
-RUN --mount=type=cache,target=/root/.ccache \
-    if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
-        make -j $(expr $(nproc) / 2 ) ; \
-    else \
-        make -j 5 ; \
-    fi
-
-FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
-COPY . .
-ARG OLLAMA_SKIP_CUDA_GENERATE
-ARG OLLAMA_SKIP_CUDA_11_GENERATE
-ARG OLLAMA_SKIP_CUDA_12_GENERATE
-ARG CUDA_V11_ARCHITECTURES
-ARG CUDA_V12_ARCHITECTURES
-ARG OLLAMA_FAST_BUILD
-RUN --mount=type=cache,target=/root/.ccache \
-    make -j 5
-
-# Jetsons need to be built in discrete stages
-FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
-ARG GOLANG_VERSION
-RUN apt-get update && apt-get install -y git curl ccache && \
-    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
-    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
-    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
-WORKDIR /go/src/github.com/ollama/ollama/
-COPY . .
+RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
+ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
-ENV GOARCH arm64
-RUN --mount=type=cache,target=/root/.ccache \
-    make -j 5 cuda_v11 \
-        CUDA_ARCHITECTURES="72;87" \
-        GPU_RUNNER_VARIANT=_jetpack5 \
-        CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
-        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
-        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh

-FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
-ARG GOLANG_VERSION
-RUN apt-get update && apt-get install -y git curl ccache && \
-    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
-    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
-    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
-WORKDIR /go/src/github.com/ollama/ollama/
-COPY . .
+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
+ARG CMAKE_VERSION
+COPY ./scripts/rh_linux_deps.sh /
+RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+ENV LIBRARY_PATH /opt/amdgpu/lib64
+COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+WORKDIR /go/src/github.com/ollama/ollama/llm/generate
 ARG CGO_CFLAGS
-ENV GOARCH arm64
-RUN --mount=type=cache,target=/root/.ccache \
-    make -j 5 cuda_v12 \
-        CUDA_ARCHITECTURES="87" \
-        GPU_RUNNER_VARIANT=_jetpack6 \
-        CGO_EXTRA_LDFLAGS_LINUX=-L/usr/local/cuda/lib64/stubs \
-        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
-        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
+ARG AMDGPU_TARGETS
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
+RUN mkdir /tmp/scratch && \
+    for dep in $(zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm*/bin/deps.txt.gz) ; do \
+        cp ${dep} /tmp/scratch/ || exit 1 ; \
+    done && \
+    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
+    mkdir -p /go/src/github.com/ollama/ollama/dist/deps/ && \
+    (cd /tmp/scratch/ && tar czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz . )


-# Intermediate stages used for ./scripts/build_linux.sh
-FROM --platform=linux/amd64 centos:7 AS builder-amd64
+FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
 ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
-ENV CGO_ENABLED 1
-ENV GOARCH amd64
-WORKDIR /go/src/github.com/ollama/ollama
-
-FROM --platform=linux/amd64 builder-amd64 AS build-amd64
-COPY . .
-COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
-ARG GOFLAGS
+COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+ARG OLLAMA_CUSTOM_CPU_DEFS
 ARG CGO_CFLAGS
-ARG OLLAMA_SKIP_ROCM_GENERATE
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-amd64/bin/ollama .
-RUN cd dist/linux-$GOARCH && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
-RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
-    cd dist/linux-$GOARCH-rocm && \
-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
-    fi
+WORKDIR /go/src/github.com/ollama/ollama/llm/generate

-FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
+FROM --platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
+RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
+FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
+FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx" sh gen_linux.sh
+FROM --platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu_avx2" sh gen_linux.sh
+
+FROM --platform=linux/arm64 centos:7 AS cpu-builder-arm64
 ARG CMAKE_VERSION
 ARG GOLANG_VERSION
 COPY ./scripts/rh_linux_deps.sh /
 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
+COPY --from=llm-code / /go/src/github.com/ollama/ollama/
+ARG OLLAMA_CUSTOM_CPU_DEFS
+ARG CGO_CFLAGS
+WORKDIR /go/src/github.com/ollama/ollama/llm/generate
+
+FROM --platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
+RUN OLLAMA_CPU_TARGET="static" sh gen_linux.sh
+FROM --platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
+RUN OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" sh gen_linux.sh
+
+
+# Intermediate stage used for ./scripts/build_linux.sh
+FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
 ENV CGO_ENABLED 1
-ENV GOARCH arm64
-WORKDIR /go/src/github.com/ollama/ollama
-
-FROM --platform=linux/arm64 builder-arm64 AS build-arm64
-COPY . .
-COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
-COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/build/ build/
-COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
-COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/build/ build/
-ARG GOFLAGS
-ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-arm64/bin/ollama .
-RUN cd dist/linux-$GOARCH && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
-RUN cd dist/linux-$GOARCH-jetpack5 && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
-RUN cd dist/linux-$GOARCH-jetpack6 && \
-    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
-
-FROM --platform=linux/amd64 scratch AS dist-amd64
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
-FROM --platform=linux/arm64 scratch AS dist-arm64
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
-FROM dist-$TARGETARCH AS dist
-
-
-# Optimized container images do not cary nested payloads
-FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
 WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
+COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
+COPY --from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-amd64/bin/ollama .
+RUN go build -trimpath .

-FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
+# Intermediate stage used for ./scripts/build_linux.sh
+FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
+ENV CGO_ENABLED 1
+ARG GOLANG_VERSION
 WORKDIR /go/src/github.com/ollama/ollama
 COPY . .
+COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
+COPY --from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
 ARG GOFLAGS
 ARG CGO_CFLAGS
-RUN --mount=type=cache,target=/root/.ccache \
-    go build -trimpath -o dist/linux-arm64/bin/ollama .
+RUN go build -trimpath .

-# For amd64 container images, filter out cuda/rocm to minimize size
-FROM runners-amd64 AS runners-cuda-amd64
-RUN rm -rf \
-    ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
-    ./dist/linux-amd64/lib/ollama/runners/rocm*
-
-FROM runners-amd64 AS runners-rocm-amd64
-RUN rm -rf \
-    ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
-    ./dist/linux-amd64/lib/ollama/libcu*.so* \
-    ./dist/linux-amd64/lib/ollama/runners/cuda*
-
-FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
-RUN apt-get update && \
-    apt-get install -y ca-certificates && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
-COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
-COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
-
-FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
-RUN apt-get update && \
-    apt-get install -y ca-certificates && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
-COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
-COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
-COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
-COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
-COPY --from=cuda-build-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
-COPY --from=cuda-build-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
-
-
-# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
-FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
-# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
-# across releases
-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
-RUN apt-get update && \
-    apt-get install -y ca-certificates && \
-    apt-get clean && rm -rf /var/lib/apt/lists/*
-COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
-COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
+# Runtime stages
+FROM --platform=linux/amd64 ubuntu:22.04 as runtime-amd64
+RUN apt-get update && apt-get install -y ca-certificates
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
+FROM --platform=linux/arm64 ubuntu:22.04 as runtime-arm64
+RUN apt-get update && apt-get install -y ca-certificates
+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/ollama /bin/ollama

+# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
+RUN update-pciids
+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
 EXPOSE 11434
 ENV OLLAMA_HOST 0.0.0.0

--- a/4
+++ b/4
@ -1,4 +0,0 @@
-GOALS := $(or $(MAKECMDGOALS),all)
-.PHONY: $(GOALS)
-$(GOALS):
-	$(MAKE) -C llama $@
--- a/README.md
+++ b/README.md
@ -6,13 +6,13 @@

 [![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)

-Get up and running with large language models.
+Get up and running with large language models locally.

 ### macOS

 [Download](https://ollama.com/download/Ollama-darwin.zip)

-### Windows
+### Windows preview

 [Download](https://ollama.com/download/OllamaSetup.exe)

@ -35,10 +35,10 @@ The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `olla

 ## Quickstart

-To run and chat with [Llama 3.2](https://ollama.com/library/llama3.2):
+To run and chat with [Llama 3](https://ollama.com/library/llama3):

 ```
-ollama run llama3.2
+ollama run llama3
 ```

 ## Model library
@ -47,31 +47,24 @@ Ollama supports a list of models available on [ollama.com/library](https://ollam

 Here are some example models that can be downloaded:

-| Model              | Parameters | Size  | Download                         |
-| ------------------ | ---------- | ----- | -------------------------------- |
-| Llama 3.2          | 3B         | 2.0GB | `ollama run llama3.2`            |
-| Llama 3.2          | 1B         | 1.3GB | `ollama run llama3.2:1b`         |
-| Llama 3.2 Vision   | 11B        | 7.9GB | `ollama run llama3.2-vision`     |
-| Llama 3.2 Vision   | 90B        | 55GB  | `ollama run llama3.2-vision:90b` |
-| Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`            |
-| Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`        |
-| Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`       |
-| Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`                |
-| Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`         |
-| Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`           |
-| Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`              |
-| Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`          |
-| Mistral            | 7B         | 4.1GB | `ollama run mistral`             |
-| Moondream 2        | 1.4B       | 829MB | `ollama run moondream`           |
-| Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`         |
-| Starling           | 7B         | 4.1GB | `ollama run starling-lm`         |
-| Code Llama         | 7B         | 3.8GB | `ollama run codellama`           |
-| Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored`   |
-| LLaVA              | 7B         | 4.5GB | `ollama run llava`               |
-| Solar              | 10.7B      | 6.1GB | `ollama run solar`               |
+| Model              | Parameters | Size  | Download                       |
+| ------------------ | ---------- | ----- | ------------------------------ |
+| Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
+| Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
+| Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
+| Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
+| Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
+| Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
+| Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
+| Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
+| Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
+| Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
+| Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
+| Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
+| LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
+| Solar              | 10.7B      | 6.1GB | `ollama run solar`             |

-> [!NOTE]
-> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
+> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.

 ## Customize a model

@ -103,16 +96,16 @@ See the [guide](docs/import.md) on importing models for more information.

 ### Customize a prompt

-Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.2` model:
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3` model:

 ```
-ollama pull llama3.2
+ollama pull llama3
 ```

 Create a `Modelfile`:

 ```
-FROM llama3.2
+FROM llama3

 # set the temperature to 1 [higher is more creative, lower is more coherent]
 PARAMETER temperature 1
@ -147,7 +140,7 @@ ollama create mymodel -f ./Modelfile
 ### Pull a model

 ```
-ollama pull llama3.2
+ollama pull llama3
 ```

 > This command can also be used to update a local model. Only the diff will be pulled.
@ -155,13 +148,13 @@ ollama pull llama3.2
 ### Remove a model

 ```
-ollama rm llama3.2
+ollama rm llama3
 ```

 ### Copy a model

 ```
-ollama cp llama3.2 my-model
+ollama cp llama3 my-model
 ```

 ### Multiline input
@ -178,41 +171,23 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 ### Multimodal models

 ```
-ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
+>>> What's in this image? /Users/jmorgan/Desktop/smile.png
 The image features a yellow smiley face, which is likely the central focus of the picture.
 ```

 ### Pass the prompt as an argument

 ```
-$ ollama run llama3.2 "Summarize this file: $(cat README.md)"
+$ ollama run llama3 "Summarize this file: $(cat README.md)"
 Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
 ```

-### Show model information
-
-```
-ollama show llama3.2
-```
-
 ### List models on your computer

 ```
 ollama list
 ```

-### List which models are currently loaded
-
-```
-ollama ps
-```
-
-### Stop a model which is currently running
-
-```
-ollama stop llama3.2
-```
-
 ### Start Ollama

 `ollama serve` is used when you want to start ollama without running the desktop application.
@ -232,7 +207,7 @@ Next, start the server:
 Finally, in a separate shell, run a model:

 ```
-./ollama run llama3.2
+./ollama run llama3
 ```

 ## REST API
@ -243,7 +218,7 @@ Ollama has a REST API for running and managing models.

 ```
 curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.2",
+  "model": "llama3",
  "prompt":"Why is the sky blue?"
 }'
 ```
@ -252,7 +227,7 @@ curl http://localhost:11434/api/generate -d '{

 ```
 curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.2",
+  "model": "llama3",
  "messages": [
    { "role": "user", "content": "why is the sky blue?" }
  ]
@ -310,31 +285,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
 - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
 - [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
- [AiLama](https://github.com/zeyoyt/ailama) (A Discord User App that allows you to interact with Ollama anywhere in discord )
- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
- [Painting Droid](https://github.com/mateuszmigas/painting-droid) (Painting app with AI integrations)
- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
- [Go-CREW](https://www.jonathanhecl.com/go-crew/) (Powerful Offline RAG in Golang)
- [PartCAD](https://github.com/openvmp/partcad/) (CAD model generation with OpenSCAD and CadQuery)
- [Ollama4j Web UI](https://github.com/ollama4j/ollama4j-web-ui) - Java-based Web UI for Ollama built with Vaadin, Spring Boot and Ollama4j
- [PyOllaMx](https://github.com/kspviswa/pyOllaMx) - macOS application capable of chatting with both Ollama and Apple MLX models.
- [Claude Dev](https://github.com/saoudrizwan/claude-dev) - VSCode extension for multi-file/whole-repo coding
- [Cherry Studio](https://github.com/kangfenmao/cherry-studio) (Desktop client with Ollama support)
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
- [Archyve](https://github.com/nickthecook/archyve) (RAG-enabling document library)
- [crewAI with Mesop](https://github.com/rapidarchitect/ollama-crew-mesop) (Mesop Web Interface to run crewAI with Ollama)
- [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux)
- [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
- [Hexabot](https://github.com/hexastack/hexabot) (A conversational AI builder)
- [Reddit Rate]((https://github.com/rapidarchitect/reddit_analyzer)) (Search and Rate Reddit topics with a weighted summation)

 ### Terminal

@ -357,14 +307,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ShellOracle](https://github.com/djcopley/ShellOracle)
 - [tlm](https://github.com/yusufcanb/tlm)
 - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
- [gollama](https://github.com/sammcj/gollama)
- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
- [Ollama Mixture of Experts (MOE) in 50 lines of code](https://github.com/rapidarchitect/ollama_moe)
- [vim-intelligence-bridge](https://github.com/pepo-ec/vim-intelligence-bridge) Simple interaction of "Ollama" with the Vim editor
- [aichat](https://github.com/sigoden/aichat) All-in-one LLM CLI tool featuring Shell Assistant, Chat-REPL, RAG, AI tools & agents, with access to OpenAI, Claude, Gemini, Ollama, Groq, and more.
-
-### Apple Vision Pro
- [Enchanted](https://github.com/AugustDev/enchanted)

 ### Database

@ -374,28 +316,20 @@ See the [API documentation](./docs/api.md) for all endpoints.
 ### Package managers

 - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 - [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
- [Nix package](https://search.nixos.org/packages?channel=24.05&show=ollama&from=0&size=50&sort=relevance&type=packages&query=ollama)
- [Flox](https://flox.dev/blog/ollama-part-one)

 ### Libraries

- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/integrations/chat/ollama/) with [example](https://js.langchain.com/docs/tutorials/local_rag/)
- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
- [crewAI](https://github.com/crewAIInc/crewAI)
+- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
- [LlamaIndex](https://docs.llamaindex.ai/en/stable/examples/llm/ollama/) and [LlamaIndexTS](https://ts.llamaindex.ai/modules/llms/available_llms/ollama)
+- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaFarm for Go](https://github.com/presbrey/ollamafarm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
- [Ollama4j for Java](https://github.com/ollama4j/ollama4j)
+- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
 - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
@ -412,20 +346,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
 - [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
 - [LlamaScript](https://github.com/Project-Llama/llamascript)
- [Gollm](https://docs.gollm.co/examples/ollama-example)
- [Ollamaclient for Golang](https://github.com/xyproto/ollamaclient)
- [High-level function abstraction in Go](https://gitlab.com/tozd/go/fun)
- [Ollama PHP](https://github.com/ArdaGnsrn/ollama-php)
- [Agents-Flex for Java](https://github.com/agents-flex/agents-flex) with [example](https://github.com/agents-flex/agents-flex/tree/main/agents-flex-llm/agents-flex-llm-ollama/src/test/java/com/agentsflex/llm/ollama)
- [Ollama for Swift](https://github.com/mattt/ollama-swift)
- [GoLamify](https://github.com/prasad89/golamify)
-
 ### Mobile

 - [Enchanted](https://github.com/AugustDev/enchanted)
 - [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)

 ### Extensions & Plugins

@ -448,22 +372,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
 - [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
+- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
- [Plasmoid Ollama Control](https://github.com/imoize/plasmoid-ollamacontrol) (KDE Plasma extension that allows you to quickly manage/control Ollama model)
 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
 - [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
- [Local AI Helper](https://github.com/ivostoykov/localAI) (Chrome and Firefox extensions that enable interactions with the active tab and customisable API endpoints. Includes secure storage for user prompts.)
- [vnc-lm](https://github.com/jk011ru/vnc-lm) (A containerized Discord bot with support for attachments and web links)
- [LSP-AI](https://github.com/SilasMarvin/lsp-ai) (Open-source language server for AI-powered functionality)
- [QodeAssist](https://github.com/Palm1r/QodeAssist) (AI-powered coding assistant plugin for Qt Creator)
- [Obsidian Quiz Generator plugin](https://github.com/ECuiDev/obsidian-quiz-generator)
- [TextCraft](https://github.com/suncloudsmoon/TextCraft) (Copilot in Word alternative using Ollama)
-
-### Supported backends

+### Supported backends 
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.

--- a/SECURITY.md
+++ b/SECURITY.md
@ -1,25 +0,0 @@
-# Security
-
-The Ollama maintainer team takes security seriously and will actively work to resolve security issues.
-
-## Reporting a vulnerability
-
-If you discover a security vulnerability, please do not open a public issue. Instead, please report it by emailing hello@ollama.com. We ask that you give us sufficient time to investigate and address the vulnerability before disclosing it publicly.
-
-Please include the following details in your report:
- A description of the vulnerability
- Steps to reproduce the issue
- Your assessment of the potential impact
- Any possible mitigations
-
-## Security best practices
-
-While the maintainer team does their best to secure Ollama, users are encouraged to implement their own security best practices, such as:
-
- Regularly updating to the latest version of Ollama
- Securing access to hosted instances of Ollama
- Monitoring systems for unusual activity
-
-## Contact
-
-For any other questions or concerns related to security, please contact us at hello@ollama.com
--- a/api/client.go
+++ b/api/client.go
@ -18,14 +18,16 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
+	"net"
 	"net/http"
 	"net/url"
+	"os"
 	"runtime"
+	"strconv"
+	"strings"

-	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/version"
 )
@ -55,7 +57,7 @@ func checkError(resp *http.Response, body []byte) error {

 // ClientFromEnvironment creates a new [Client] using configuration from the
 // environment variable OLLAMA_HOST, which points to the network host and
-// port on which the ollama service is listening. The format of this variable
+// port on which the ollama service is listenting. The format of this variable
 // is:
 //
 //	<scheme>://<host>:<port>
@ -63,12 +65,66 @@ func checkError(resp *http.Response, body []byte) error {
 // If the variable is not specified, a default ollama host and port will be
 // used.
 func ClientFromEnvironment() (*Client, error) {
+	ollamaHost, err := GetOllamaHost()
+	if err != nil {
+		return nil, err
+	}
+
 	return &Client{
-		base: envconfig.Host(),
+		base: &url.URL{
+			Scheme: ollamaHost.Scheme,
+			Host:   net.JoinHostPort(ollamaHost.Host, ollamaHost.Port),
+		},
 		http: http.DefaultClient,
 	}, nil
 }

+type OllamaHost struct {
+	Scheme string
+	Host   string
+	Port   string
+}
+
+func GetOllamaHost() (OllamaHost, error) {
+	defaultPort := "11434"
+
+	hostVar := os.Getenv("OLLAMA_HOST")
+	hostVar = strings.TrimSpace(strings.Trim(strings.TrimSpace(hostVar), "\"'"))
+
+	scheme, hostport, ok := strings.Cut(hostVar, "://")
+	switch {
+	case !ok:
+		scheme, hostport = "http", hostVar
+	case scheme == "http":
+		defaultPort = "80"
+	case scheme == "https":
+		defaultPort = "443"
+	}
+
+	// trim trailing slashes
+	hostport = strings.TrimRight(hostport, "/")
+
+	host, port, err := net.SplitHostPort(hostport)
+	if err != nil {
+		host, port = "127.0.0.1", defaultPort
+		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
+			host = ip.String()
+		} else if hostport != "" {
+			host = hostport
+		}
+	}
+
+	if portNum, err := strconv.ParseInt(port, 10, 32); err != nil || portNum > 65535 || portNum < 0 {
+		return OllamaHost{}, ErrInvalidHostPort
+	}
+
+	return OllamaHost{
+		Scheme: scheme,
+		Host:   host,
+		Port:   port,
+	}, nil
+}
+
 func NewClient(base *url.URL, http *http.Client) *Client {
 	return &Client{
 		base: base,
@ -173,7 +229,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 		}

 		if errorResponse.Error != "" {
-			return errors.New(errorResponse.Error)
+			return fmt.Errorf(errorResponse.Error)
 		}

 		if response.StatusCode >= http.StatusBadRequest {
@ -298,9 +354,9 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
 	return &lr, nil
 }

-// ListRunning lists running models.
-func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
-	var lr ProcessResponse
+// List running models.
+func (c *Client) ListRunning(ctx context.Context) (*ListResponse, error) {
+	var lr ListResponse
 	if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
 		return nil, err
 	}
@ -333,7 +389,7 @@ func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, err
 	return &resp, nil
 }

-// Heartbeat checks if the server has started and is responsive; if yes, it
+// Hearbeat checks if the server has started and is responsive; if yes, it
 // returns nil, otherwise an error.
 func (c *Client) Heartbeat(ctx context.Context) error {
 	if err := c.do(ctx, http.MethodHead, "/", nil, nil); err != nil {
@ -342,16 +398,7 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	return nil
 }

-// Embed generates embeddings from a model.
-func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error) {
-	var resp EmbedResponse
-	if err := c.do(ctx, http.MethodPost, "/api/embed", req, &resp); err != nil {
-		return nil, err
-	}
-	return &resp, nil
-}
-
-// Embeddings generates an embedding from a model.
+// Embeddings generates embeddings from a model.
 func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
 	var resp EmbeddingResponse
 	if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
--- a/api/client_test.go
+++ b/api/client_test.go
@ -1,7 +1,11 @@
 package api

 import (
+	"fmt"
+	"net"
 	"testing"
+
+	"github.com/stretchr/testify/assert"
 )

 func TestClientFromEnvironment(t *testing.T) {
@ -42,4 +46,40 @@ func TestClientFromEnvironment(t *testing.T) {
 			}
 		})
 	}
+
+	hostTestCases := map[string]*testCase{
+		"empty":               {value: "", expect: "127.0.0.1:11434"},
+		"only address":        {value: "1.2.3.4", expect: "1.2.3.4:11434"},
+		"only port":           {value: ":1234", expect: ":1234"},
+		"address and port":    {value: "1.2.3.4:1234", expect: "1.2.3.4:1234"},
+		"hostname":            {value: "example.com", expect: "example.com:11434"},
+		"hostname and port":   {value: "example.com:1234", expect: "example.com:1234"},
+		"zero port":           {value: ":0", expect: ":0"},
+		"too large port":      {value: ":66000", err: ErrInvalidHostPort},
+		"too small port":      {value: ":-1", err: ErrInvalidHostPort},
+		"ipv6 localhost":      {value: "[::1]", expect: "[::1]:11434"},
+		"ipv6 world open":     {value: "[::]", expect: "[::]:11434"},
+		"ipv6 no brackets":    {value: "::1", expect: "[::1]:11434"},
+		"ipv6 + port":         {value: "[::1]:1337", expect: "[::1]:1337"},
+		"extra space":         {value: " 1.2.3.4 ", expect: "1.2.3.4:11434"},
+		"extra quotes":        {value: "\"1.2.3.4\"", expect: "1.2.3.4:11434"},
+		"extra space+quotes":  {value: " \" 1.2.3.4 \" ", expect: "1.2.3.4:11434"},
+		"extra single quotes": {value: "'1.2.3.4'", expect: "1.2.3.4:11434"},
+	}
+
+	for k, v := range hostTestCases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_HOST", v.value)
+
+			oh, err := GetOllamaHost()
+			if err != v.err {
+				t.Fatalf("expected %s, got %s", v.err, err)
+			}
+
+			if err == nil {
+				host := net.JoinHostPort(oh.Host, oh.Port)
+				assert.Equal(t, v.expect, host, fmt.Sprintf("%s: expected %s, got %s", k, v.expect, host))
+			}
+		})
+	}
 }
--- a/api/types.go
+++ b/api/types.go
@ -2,6 +2,7 @@ package api

 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"log/slog"
 	"math"
@ -12,7 +13,7 @@ import (
 	"time"
 )

-// StatusError is an error with an HTTP status code and message.
+// StatusError is an error with and HTTP status code.
 type StatusError struct {
 	StatusCode   int
 	Status       string
@ -47,9 +48,6 @@ type GenerateRequest struct {
 	// Prompt is the textual prompt to send to the model.
 	Prompt string `json:"prompt"`

-	// Suffix is the text that comes after the inserted text.
-	Suffix string `json:"suffix"`
-
 	// System overrides the model's default system message/prompt.
 	System string `json:"system"`

@ -57,7 +55,7 @@ type GenerateRequest struct {
 	Template string `json:"template"`

 	// Context is the context parameter returned from a previous call to
-	// [Client.Generate]. It can be used to keep a short conversational memory.
+	// Generate call. It can be used to keep a short conversational memory.
 	Context []int `json:"context,omitempty"`

 	// Stream specifies whether the response is streaming; it is true by default.
@ -90,95 +88,27 @@ type ChatRequest struct {
 	// Messages is the messages of the chat - can be used to keep a chat memory.
 	Messages []Message `json:"messages"`

-	// Stream enables streaming of returned responses; true by default.
+	// Stream enable streaming of returned response; true by default.
 	Stream *bool `json:"stream,omitempty"`

 	// Format is the format to return the response in (e.g. "json").
 	Format string `json:"format"`

 	// KeepAlive controls how long the model will stay loaded into memory
-	// following the request.
+	// followin the request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`

-	// Tools is an optional list of tools the model has access to.
-	Tools `json:"tools,omitempty"`
-
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }

-type Tools []Tool
-
-func (t Tools) String() string {
-	bts, _ := json.Marshal(t)
-	return string(bts)
-}
-
-func (t Tool) String() string {
-	bts, _ := json.Marshal(t)
-	return string(bts)
-}
-
 // Message is a single message in a chat sequence. The message contains the
 // role ("system", "user", or "assistant"), the content and an optional list
 // of images.
 type Message struct {
-	Role      string      `json:"role"`
-	Content   string      `json:"content"`
-	Images    []ImageData `json:"images,omitempty"`
-	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
-}
-
-func (m *Message) UnmarshalJSON(b []byte) error {
-	type Alias Message
-	var a Alias
-	if err := json.Unmarshal(b, &a); err != nil {
-		return err
-	}
-
-	*m = Message(a)
-	m.Role = strings.ToLower(m.Role)
-	return nil
-}
-
-type ToolCall struct {
-	Function ToolCallFunction `json:"function"`
-}
-
-type ToolCallFunction struct {
-	Name      string                    `json:"name"`
-	Arguments ToolCallFunctionArguments `json:"arguments"`
-}
-
-type ToolCallFunctionArguments map[string]any
-
-func (t *ToolCallFunctionArguments) String() string {
-	bts, _ := json.Marshal(t)
-	return string(bts)
-}
-
-type Tool struct {
-	Type     string       `json:"type"`
-	Function ToolFunction `json:"function"`
-}
-
-type ToolFunction struct {
-	Name        string `json:"name"`
-	Description string `json:"description"`
-	Parameters  struct {
-		Type       string   `json:"type"`
-		Required   []string `json:"required"`
-		Properties map[string]struct {
-			Type        string   `json:"type"`
-			Description string   `json:"description"`
-			Enum        []string `json:"enum,omitempty"`
-		} `json:"properties"`
-	} `json:"parameters"`
-}
-
-func (t *ToolFunction) String() string {
-	bts, _ := json.Marshal(t)
-	return string(bts)
+	Role    string      `json:"role"`
+	Content string      `json:"content"`
+	Images  []ImageData `json:"images,omitempty"`
 }

 // ChatResponse is the response returned by [Client.Chat]. Its fields are
@ -203,8 +133,8 @@ type Metrics struct {
 	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
 }

-// Options specified in [GenerateRequest].  If you add a new option here, also
-// add it to the API docs.
+// Options specified in [GenerateRequest], if you add a new option here add it
+// to the API docs also.
 type Options struct {
 	Runner

@ -214,7 +144,6 @@ type Options struct {
 	NumPredict       int      `json:"num_predict,omitempty"`
 	TopK             int      `json:"top_k,omitempty"`
 	TopP             float32  `json:"top_p,omitempty"`
-	MinP             float32  `json:"min_p,omitempty"`
 	TFSZ             float32  `json:"tfs_z,omitempty"`
 	TypicalP         float32  `json:"typical_p,omitempty"`
 	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
@ -231,45 +160,18 @@ type Options struct {

 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
-	NumCtx    int   `json:"num_ctx,omitempty"`
-	NumBatch  int   `json:"num_batch,omitempty"`
-	NumGPU    int   `json:"num_gpu,omitempty"`
-	MainGPU   int   `json:"main_gpu,omitempty"`
-	LowVRAM   bool  `json:"low_vram,omitempty"`
-	F16KV     bool  `json:"f16_kv,omitempty"` // Deprecated: This option is ignored
-	LogitsAll bool  `json:"logits_all,omitempty"`
-	VocabOnly bool  `json:"vocab_only,omitempty"`
-	UseMMap   *bool `json:"use_mmap,omitempty"`
-	UseMLock  bool  `json:"use_mlock,omitempty"`
-	NumThread int   `json:"num_thread,omitempty"`
-}
-
-// EmbedRequest is the request passed to [Client.Embed].
-type EmbedRequest struct {
-	// Model is the model name.
-	Model string `json:"model"`
-
-	// Input is the input to embed.
-	Input any `json:"input"`
-
-	// KeepAlive controls how long the model will stay loaded in memory following
-	// this request.
-	KeepAlive *Duration `json:"keep_alive,omitempty"`
-
-	Truncate *bool `json:"truncate,omitempty"`
-
-	// Options lists model-specific options.
-	Options map[string]interface{} `json:"options"`
-}
-
-// EmbedResponse is the response from [Client.Embed].
-type EmbedResponse struct {
-	Model      string      `json:"model"`
-	Embeddings [][]float32 `json:"embeddings"`
-
-	TotalDuration   time.Duration `json:"total_duration,omitempty"`
-	LoadDuration    time.Duration `json:"load_duration,omitempty"`
-	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
+	UseNUMA   bool `json:"numa,omitempty"`
+	NumCtx    int  `json:"num_ctx,omitempty"`
+	NumBatch  int  `json:"num_batch,omitempty"`
+	NumGPU    int  `json:"num_gpu,omitempty"`
+	MainGPU   int  `json:"main_gpu,omitempty"`
+	LowVRAM   bool `json:"low_vram,omitempty"`
+	F16KV     bool `json:"f16_kv,omitempty"`
+	LogitsAll bool `json:"logits_all,omitempty"`
+	VocabOnly bool `json:"vocab_only,omitempty"`
+	UseMMap   bool `json:"use_mmap,omitempty"`
+	UseMLock  bool `json:"use_mlock,omitempty"`
+	NumThread int  `json:"num_thread,omitempty"`
 }

 // EmbeddingRequest is the request passed to [Client.Embeddings].
@ -296,17 +198,15 @@ type EmbeddingResponse struct {
 // CreateRequest is the request passed to [Client.Create].
 type CreateRequest struct {
 	Model     string `json:"model"`
+	Path      string `json:"path"`
 	Modelfile string `json:"modelfile"`
 	Stream    *bool  `json:"stream,omitempty"`
 	Quantize  string `json:"quantize,omitempty"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`

-	// Deprecated: set the file content with Modelfile instead
-	Path string `json:"path"`
-
-	// Deprecated: use Quantize instead
+	// Quantization is deprecated, see Quantize
 	Quantization string `json:"quantization,omitempty"`
 }

@ -314,37 +214,31 @@ type CreateRequest struct {
 type DeleteRequest struct {
 	Model string `json:"model"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

 // ShowRequest is the request passed to [Client.Show].
 type ShowRequest struct {
-	Model  string `json:"model"`
-	System string `json:"system"`
-
-	// Template is deprecated
+	Model    string `json:"model"`
+	System   string `json:"system"`
 	Template string `json:"template"`
-	Verbose  bool   `json:"verbose"`

 	Options map[string]interface{} `json:"options"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

 // ShowResponse is the response returned from [Client.Show].
 type ShowResponse struct {
-	License       string         `json:"license,omitempty"`
-	Modelfile     string         `json:"modelfile,omitempty"`
-	Parameters    string         `json:"parameters,omitempty"`
-	Template      string         `json:"template,omitempty"`
-	System        string         `json:"system,omitempty"`
-	Details       ModelDetails   `json:"details,omitempty"`
-	Messages      []Message      `json:"messages,omitempty"`
-	ModelInfo     map[string]any `json:"model_info,omitempty"`
-	ProjectorInfo map[string]any `json:"projector_info,omitempty"`
-	ModifiedAt    time.Time      `json:"modified_at,omitempty"`
+	License    string       `json:"license,omitempty"`
+	Modelfile  string       `json:"modelfile,omitempty"`
+	Parameters string       `json:"parameters,omitempty"`
+	Template   string       `json:"template,omitempty"`
+	System     string       `json:"system,omitempty"`
+	Details    ModelDetails `json:"details,omitempty"`
+	Messages   []Message    `json:"messages,omitempty"`
 }

 // CopyRequest is the request passed to [Client.Copy].
@ -361,7 +255,7 @@ type PullRequest struct {
 	Password string `json:"password"`
 	Stream   *bool  `json:"stream,omitempty"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

@ -382,46 +276,25 @@ type PushRequest struct {
 	Password string `json:"password"`
 	Stream   *bool  `json:"stream,omitempty"`

-	// Deprecated: set the model name with Model instead
+	// Name is deprecated, see Model
 	Name string `json:"name"`
 }

 // ListResponse is the response from [Client.List].
 type ListResponse struct {
-	Models []ListModelResponse `json:"models"`
+	Models []ModelResponse `json:"models"`
 }

-// ProcessResponse is the response from [Client.Process].
-type ProcessResponse struct {
-	Models []ProcessModelResponse `json:"models"`
-}
-
-// ListModelResponse is a single model description in [ListResponse].
-type ListModelResponse struct {
+// ModelResponse is a single model description in [ListResponse].
+type ModelResponse struct {
 	Name       string       `json:"name"`
 	Model      string       `json:"model"`
-	ModifiedAt time.Time    `json:"modified_at"`
+	ModifiedAt time.Time    `json:"modified_at,omitempty"`
 	Size       int64        `json:"size"`
 	Digest     string       `json:"digest"`
 	Details    ModelDetails `json:"details,omitempty"`
-}
-
-// ProcessModelResponse is a single model description in [ProcessResponse].
-type ProcessModelResponse struct {
-	Name      string       `json:"name"`
-	Model     string       `json:"model"`
-	Size      int64        `json:"size"`
-	Digest    string       `json:"digest"`
-	Details   ModelDetails `json:"details,omitempty"`
-	ExpiresAt time.Time    `json:"expires_at"`
-	SizeVRAM  int64        `json:"size_vram"`
-}
-
-type RetrieveModelResponse struct {
-	Id      string `json:"id"`
-	Object  string `json:"object"`
-	Created int64  `json:"created"`
-	OwnedBy string `json:"owned_by"`
+	ExpiresAt  time.Time    `json:"expires_at,omitempty"`
+	SizeVRAM   int64        `json:"size_vram,omitempty"`
 }

 type TokenResponse struct {
@ -433,7 +306,7 @@ type GenerateResponse struct {
 	// Model is the model name that generated the response.
 	Model string `json:"model"`

-	// CreatedAt is the timestamp of the response.
+	//CreatedAt is the timestamp of the response.
 	CreatedAt time.Time `json:"created_at"`

 	// Response is the textual response itself.
@ -490,6 +363,8 @@ func (m *Metrics) Summary() {
 	}
 }

+var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
+
 func (opts *Options) FromMap(m map[string]interface{}) error {
 	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
 	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct
@ -506,7 +381,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 	for key, val := range m {
 		opt, ok := jsonOpts[key]
 		if !ok {
-			slog.Warn("invalid option provided", "option", key)
+			slog.Warn("invalid option provided", "option", opt.Name)
 			continue
 		}

@ -562,17 +437,6 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
 					slice[i] = str
 				}
 				field.Set(reflect.ValueOf(slice))
-			case reflect.Pointer:
-				var b bool
-				if field.Type() == reflect.TypeOf(&b) {
-					val, ok := val.(bool)
-					if !ok {
-						return fmt.Errorf("option %q must be of type boolean", key)
-					}
-					field.Set(reflect.ValueOf(&val))
-				} else {
-					return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type())
-				}
 			default:
 				return fmt.Errorf("unknown type loading config params: %v", field.Kind())
 			}
@ -613,8 +477,10 @@ func DefaultOptions() Options {
 			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
 			NumThread: 0,  // let the runtime decide
 			LowVRAM:   false,
+			F16KV:     true,
 			UseMLock:  false,
-			UseMMap:   nil,
+			UseMMap:   true,
+			UseNUMA:   false,
 		},
 	}
 }
@ -710,17 +576,6 @@ func FormatParams(params map[string][]string) (map[string]interface{}, error) {
 				case reflect.Slice:
 					// TODO: only string slices are supported right now
 					out[key] = vals
-				case reflect.Pointer:
-					var b bool
-					if field.Type() == reflect.TypeOf(&b) {
-						boolVal, err := strconv.ParseBool(vals[0])
-						if err != nil {
-							return nil, fmt.Errorf("invalid bool value %s", vals)
-						}
-						out[key] = &boolVal
-					} else {
-						return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
-					}
 				default:
 					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
 				}
--- a/api/types_test.go
+++ b/api/types_test.go
@ -2,7 +2,6 @@ package api

 import (
 	"encoding/json"
-	"errors"
 	"math"
 	"testing"
 	"time"
@ -73,13 +72,13 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
 		},
 		{
 			"positive duration",
-			42 * time.Second,
-			42 * time.Second,
+			time.Duration(42 * time.Second),
+			time.Duration(42 * time.Second),
 		},
 		{
 			"another positive duration",
-			42 * time.Minute,
-			42 * time.Minute,
+			time.Duration(42 * time.Minute),
+			time.Duration(42 * time.Minute),
 		},
 		{
 			"zero duration",
@ -106,128 +105,3 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
 		})
 	}
 }
-
-func TestUseMmapParsingFromJSON(t *testing.T) {
-	tr := true
-	fa := false
-	tests := []struct {
-		name string
-		req  string
-		exp  *bool
-	}{
-		{
-			name: "Undefined",
-			req:  `{ }`,
-			exp:  nil,
-		},
-		{
-			name: "True",
-			req:  `{ "use_mmap": true }`,
-			exp:  &tr,
-		},
-		{
-			name: "False",
-			req:  `{ "use_mmap": false }`,
-			exp:  &fa,
-		},
-	}
-
-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
-			var oMap map[string]interface{}
-			err := json.Unmarshal([]byte(test.req), &oMap)
-			require.NoError(t, err)
-			opts := DefaultOptions()
-			err = opts.FromMap(oMap)
-			require.NoError(t, err)
-			assert.Equal(t, test.exp, opts.UseMMap)
-		})
-	}
-}
-
-func TestUseMmapFormatParams(t *testing.T) {
-	tr := true
-	fa := false
-	tests := []struct {
-		name string
-		req  map[string][]string
-		exp  *bool
-		err  error
-	}{
-		{
-			name: "True",
-			req: map[string][]string{
-				"use_mmap": {"true"},
-			},
-			exp: &tr,
-			err: nil,
-		},
-		{
-			name: "False",
-			req: map[string][]string{
-				"use_mmap": {"false"},
-			},
-			exp: &fa,
-			err: nil,
-		},
-		{
-			name: "Numeric True",
-			req: map[string][]string{
-				"use_mmap": {"1"},
-			},
-			exp: &tr,
-			err: nil,
-		},
-		{
-			name: "Numeric False",
-			req: map[string][]string{
-				"use_mmap": {"0"},
-			},
-			exp: &fa,
-			err: nil,
-		},
-		{
-			name: "invalid string",
-			req: map[string][]string{
-				"use_mmap": {"foo"},
-			},
-			exp: nil,
-			err: errors.New("invalid bool value [foo]"),
-		},
-	}
-
-	for _, test := range tests {
-		t.Run(test.name, func(t *testing.T) {
-			resp, err := FormatParams(test.req)
-			require.Equal(t, test.err, err)
-			respVal, ok := resp["use_mmap"]
-			if test.exp != nil {
-				assert.True(t, ok, "resp: %v", resp)
-				assert.Equal(t, *test.exp, *respVal.(*bool))
-			}
-		})
-	}
-}
-
-func TestMessage_UnmarshalJSON(t *testing.T) {
-	tests := []struct {
-		input    string
-		expected string
-	}{
-		{`{"role": "USER", "content": "Hello!"}`, "user"},
-		{`{"role": "System", "content": "Initialization complete."}`, "system"},
-		{`{"role": "assistant", "content": "How can I help you?"}`, "assistant"},
-		{`{"role": "TOOl", "content": "Access granted."}`, "tool"},
-	}
-
-	for _, test := range tests {
-		var msg Message
-		if err := json.Unmarshal([]byte(test.input), &msg); err != nil {
-			t.Errorf("Unexpected error: %v", err)
-		}
-
-		if msg.Role != test.expected {
-			t.Errorf("role not lowercased: got %v, expected %v", msg.Role, test.expected)
-		}
-	}
-}
--- a/app/lifecycle/getstarted_nonwindows.go
+++ b/app/lifecycle/getstarted_nonwindows.go
@ -2,8 +2,8 @@

 package lifecycle

-import "errors"
+import "fmt"

 func GetStarted() error {
-	return errors.New("not implemented")
+	return fmt.Errorf("GetStarted not implemented")
 }
--- a/app/lifecycle/getstarted_windows.go
+++ b/app/lifecycle/getstarted_windows.go
@ -34,6 +34,7 @@ func GetStarted() error {
 		Sys:   &syscall.SysProcAttr{CreationFlags: CREATE_NEW_CONSOLE, HideWindow: false},
 	}
 	proc, err := os.StartProcess(args[0], args, attrs)
+
 	if err != nil {
 		return fmt.Errorf("unable to start getting started shell %w", err)
 	}
--- a/app/lifecycle/lifecycle.go
+++ b/app/lifecycle/lifecycle.go
@ -11,12 +11,10 @@ import (

 	"github.com/ollama/ollama/app/store"
 	"github.com/ollama/ollama/app/tray"
-	"github.com/ollama/ollama/envconfig"
 )

 func Run() {
 	InitLogging()
-	slog.Info("app config", "env", envconfig.Values())

 	ctx, cancel := context.WithCancel(context.Background())
 	var done chan int
--- a/app/lifecycle/logging.go
+++ b/app/lifecycle/logging.go
@ -5,8 +5,6 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
-	"strconv"
-	"strings"

 	"github.com/ollama/ollama/envconfig"
 )
@ -14,7 +12,7 @@ import (
 func InitLogging() {
 	level := slog.LevelInfo

-	if envconfig.Debug() {
+	if envconfig.Debug {
 		level = slog.LevelDebug
 	}

@ -26,8 +24,7 @@ func InitLogging() {
 		logFile = os.Stderr
 		// TODO - write one-line to the app.log file saying we're running in console mode to help avoid confusion
 	} else {
-		rotateLogs(AppLogFile)
-		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+		logFile, err = os.OpenFile(AppLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 		if err != nil {
 			slog.Error(fmt.Sprintf("failed to create server log %v", err))
 			return
@ -49,32 +46,3 @@ func InitLogging() {

 	slog.Info("ollama app started")
 }
-
-func rotateLogs(logFile string) {
-	if _, err := os.Stat(logFile); os.IsNotExist(err) {
-		return
-	}
-	index := strings.LastIndex(logFile, ".")
-	pre := logFile[:index]
-	post := "." + logFile[index+1:]
-	for i := LogRotationCount; i > 0; i-- {
-		older := pre + "-" + strconv.Itoa(i) + post
-		newer := pre + "-" + strconv.Itoa(i-1) + post
-		if i == 1 {
-			newer = pre + post
-		}
-		if _, err := os.Stat(newer); err == nil {
-			if _, err := os.Stat(older); err == nil {
-				err := os.Remove(older)
-				if err != nil {
-					slog.Warn("Failed to remove older log", "older", older, "error", err)
-					continue
-				}
-			}
-			err := os.Rename(newer, older)
-			if err != nil {
-				slog.Warn("Failed to rotate log", "older", older, "newer", newer, "error", err)
-			}
-		}
-	}
-}
--- a/app/lifecycle/logging_nonwindows.go
+++ b/app/lifecycle/logging_nonwindows.go
@ -5,5 +5,5 @@ package lifecycle
 import "log/slog"

 func ShowLogs() {
-	slog.Warn("not implemented")
+	slog.Warn("ShowLogs not yet implemented")
 }
--- a/app/lifecycle/logging_test.go
+++ b/app/lifecycle/logging_test.go
@ -1,44 +0,0 @@
-package lifecycle
-
-import (
-	"os"
-	"path/filepath"
-	"strconv"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestRotateLogs(t *testing.T) {
-	logDir := t.TempDir()
-	logFile := filepath.Join(logDir, "testlog.log")
-
-	// No log exists
-	rotateLogs(logFile)
-
-	require.NoError(t, os.WriteFile(logFile, []byte("1"), 0o644))
-	assert.FileExists(t, logFile)
-	// First rotation
-	rotateLogs(logFile)
-	assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
-	assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
-	assert.NoFileExists(t, logFile)
-
-	// Should be a no-op without a new log
-	rotateLogs(logFile)
-	assert.FileExists(t, filepath.Join(logDir, "testlog-1.log"))
-	assert.NoFileExists(t, filepath.Join(logDir, "testlog-2.log"))
-	assert.NoFileExists(t, logFile)
-
-	for i := 2; i <= LogRotationCount+1; i++ {
-		require.NoError(t, os.WriteFile(logFile, []byte(strconv.Itoa(i)), 0o644))
-		assert.FileExists(t, logFile)
-		rotateLogs(logFile)
-		assert.NoFileExists(t, logFile)
-		for j := 1; j < i; j++ {
-			assert.FileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(j)+".log"))
-		}
-		assert.NoFileExists(t, filepath.Join(logDir, "testlog-"+strconv.Itoa(i+1)+".log"))
-	}
-}
--- a/app/lifecycle/paths.go
+++ b/app/lifecycle/paths.go
@ -16,12 +16,11 @@ var (
 	AppDir     = "/opt/Ollama"
 	AppDataDir = "/opt/Ollama"
 	// TODO - should there be a distinct log dir?
-	UpdateStageDir   = "/tmp"
-	AppLogFile       = "/tmp/ollama_app.log"
-	ServerLogFile    = "/tmp/ollama.log"
-	UpgradeLogFile   = "/tmp/ollama_update.log"
-	Installer        = "OllamaSetup.exe"
-	LogRotationCount = 5
+	UpdateStageDir = "/tmp"
+	AppLogFile     = "/tmp/ollama_app.log"
+	ServerLogFile  = "/tmp/ollama.log"
+	UpgradeLogFile = "/tmp/ollama_update.log"
+	Installer      = "OllamaSetup.exe"
 )

 func init() {
@ -36,13 +35,8 @@ func init() {
 		ServerLogFile = filepath.Join(AppDataDir, "server.log")
 		UpgradeLogFile = filepath.Join(AppDataDir, "upgrade.log")

-		exe, err := os.Executable()
-		if err != nil {
-			slog.Warn("error discovering executable directory", "error", err)
-			AppDir = filepath.Join(localAppData, "Programs", "Ollama")
-		} else {
-			AppDir = filepath.Dir(exe)
-		}
+		// Executables are stored in APPDATA
+		AppDir = filepath.Join(localAppData, "Programs", "Ollama")

 		// Make sure we have PATH set correctly for any spawned children
 		paths := strings.Split(os.Getenv("PATH"), ";")
@ -69,12 +63,13 @@ func init() {
 		}

 		// Make sure our logging dir exists
-		_, err = os.Stat(AppDataDir)
+		_, err := os.Stat(AppDataDir)
 		if errors.Is(err, os.ErrNotExist) {
 			if err := os.MkdirAll(AppDataDir, 0o755); err != nil {
 				slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
 			}
 		}
+
 	} else if runtime.GOOS == "darwin" {
 		// TODO
 		AppName += ".app"
--- a/app/lifecycle/server.go
+++ b/app/lifecycle/server.go
@ -15,20 +15,14 @@ import (
 )

 func getCLIFullPath(command string) string {
-	var cmdPath string
+	cmdPath := ""
 	appExe, err := os.Executable()
 	if err == nil {
-		// Check both the same location as the tray app, as well as ./bin
 		cmdPath = filepath.Join(filepath.Dir(appExe), command)
 		_, err := os.Stat(cmdPath)
 		if err == nil {
 			return cmdPath
 		}
-		cmdPath = filepath.Join(filepath.Dir(appExe), "bin", command)
-		_, err = os.Stat(cmdPath)
-		if err == nil {
-			return cmdPath
-		}
 	}
 	cmdPath, err = exec.LookPath(command)
 	if err == nil {
@ -60,8 +54,8 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 		return nil, fmt.Errorf("failed to spawn server stderr pipe: %w", err)
 	}

-	rotateLogs(ServerLogFile)
-	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o755)
+	// TODO - rotation
+	logFile, err := os.OpenFile(ServerLogFile, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0755)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create server log: %w", err)
 	}
@ -71,6 +65,7 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
 	if err != nil {
 		if !errors.Is(err, os.ErrNotExist) {
 			return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
+
 		}

 		if err := os.MkdirAll(logDir, 0o755); err != nil {
--- a/app/lifecycle/server_windows.go
+++ b/app/lifecycle/server_windows.go
@ -24,8 +24,7 @@ func terminate(cmd *exec.Cmd) error {
 	if err != nil {
 		return err
 	}
-	//nolint:errcheck
-	defer dll.Release()
+	defer dll.Release() // nolint: errcheck

 	pid := cmd.Process.Pid

@ -74,8 +73,7 @@ func isProcessExited(pid int) (bool, error) {
 	if err != nil {
 		return false, fmt.Errorf("failed to open process: %v", err)
 	}
-	//nolint:errcheck
-	defer windows.CloseHandle(hProcess)
+	defer windows.CloseHandle(hProcess) // nolint: errcheck

 	var exitCode uint32
 	err = windows.GetExitCodeProcess(hProcess, &exitCode)
--- a/app/lifecycle/updater.go
+++ b/app/lifecycle/updater.go
@ -15,7 +15,6 @@ import (
 	"path"
 	"path/filepath"
 	"runtime"
-	"strconv"
 	"strings"
 	"time"

@ -47,7 +46,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	query.Add("os", runtime.GOOS)
 	query.Add("arch", runtime.GOARCH)
 	query.Add("version", version.Version)
-	query.Add("ts", strconv.FormatInt(time.Now().Unix(), 10))
+	query.Add("ts", fmt.Sprintf("%d", time.Now().Unix()))

 	nonce, err := auth.NewNonce(rand.Reader, 16)
 	if err != nil {
@ -79,7 +78,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 	}
 	defer resp.Body.Close()

-	if resp.StatusCode == http.StatusNoContent {
+	if resp.StatusCode == 204 {
 		slog.Debug("check update response 204 (current version is up to date)")
 		return false, updateResp
 	}
@ -88,7 +87,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
 		slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
 	}

-	if resp.StatusCode != http.StatusOK {
+	if resp.StatusCode != 200 {
 		slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
 		return false, updateResp
 	}
@ -115,7 +114,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
 	if err != nil {
 		return fmt.Errorf("error checking update: %w", err)
 	}
-	if resp.StatusCode != http.StatusOK {
+	if resp.StatusCode != 200 {
 		return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
 	}
 	resp.Body.Close()
--- a/app/lifecycle/updater_nonwindows.go
+++ b/app/lifecycle/updater_nonwindows.go
@ -4,9 +4,9 @@ package lifecycle

 import (
 	"context"
-	"errors"
+	"fmt"
 )

 func DoUpgrade(cancel context.CancelFunc, done chan int) error {
-	return errors.New("not implemented")
+	return fmt.Errorf("DoUpgrade not yet implemented")
 }
--- a/app/lifecycle/updater_windows.go
+++ b/app/lifecycle/updater_windows.go
@ -2,7 +2,6 @@ package lifecycle

 import (
 	"context"
-	"errors"
 	"fmt"
 	"log/slog"
 	"os"
@ -16,7 +15,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		return fmt.Errorf("failed to lookup downloads: %s", err)
 	}
 	if len(files) == 0 {
-		return errors.New("no update downloads found")
+		return fmt.Errorf("no update downloads found")
 	} else if len(files) > 1 {
 		// Shouldn't happen
 		slog.Warn(fmt.Sprintf("multiple downloads found, using first one %v", files))
@ -26,15 +25,19 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 	slog.Info("starting upgrade with " + installerExe)
 	slog.Info("upgrade log file " + UpgradeLogFile)

-	// make the upgrade show progress, but non interactive
+	// When running in debug mode, we'll be "verbose" and let the installer pop up and prompt
 	installArgs := []string{
 		"/CLOSEAPPLICATIONS",                    // Quit the tray app if it's still running
 		"/LOG=" + filepath.Base(UpgradeLogFile), // Only relative seems reliable, so set pwd
 		"/FORCECLOSEAPPLICATIONS",               // Force close the tray app - might be needed
-		"/SP",                                   // Skip the "This will install... Do you wish to continue" prompt
-		"/NOCANCEL",                             // Disable the ability to cancel upgrade mid-flight to avoid partially installed upgrades
-		"/SILENT",
 	}
+	// make the upgrade as quiet as possible (no GUI, no prompts)
+	installArgs = append(installArgs,
+		"/SP", // Skip the "This will install... Do you wish to continue" prompt
+		"/SUPPRESSMSGBOXES",
+		"/SILENT",
+		"/VERYSILENT",
+	)

 	// Safeguard in case we have requests in flight that need to drain...
 	slog.Info("Waiting for server to shutdown")
@ -61,7 +64,7 @@ func DoUpgrade(cancel context.CancelFunc, done chan int) error {
 		}
 	} else {
 		// TODO - some details about why it didn't start, or is this a pedantic error case?
-		return errors.New("installer process did not start")
+		return fmt.Errorf("installer process did not start")
 	}

 	// TODO should we linger for a moment and check to make sure it's actually running by checking the pid?
--- a/app/ollama.iss
+++ b/app/ollama.iss
@ -28,8 +28,8 @@ AppPublisher={#MyAppPublisher}
 AppPublisherURL={#MyAppURL}
 AppSupportURL={#MyAppURL}
 AppUpdatesURL={#MyAppURL}
-ArchitecturesAllowed=x64compatible arm64
-ArchitecturesInstallIn64BitMode=x64compatible arm64
+ArchitecturesAllowed=x64 arm64
+ArchitecturesInstallIn64BitMode=x64 arm64
 DefaultDirName={localappdata}\Programs\{#MyAppName}
 DefaultGroupName={#MyAppName}
 DisableProgramGroupPage=yes
@ -48,13 +48,12 @@ OutputDir=..\dist\
 SetupLogging=yes
 CloseApplications=yes
 RestartApplications=no
-RestartIfNeededByRun=no

 ; https://jrsoftware.org/ishelp/index.php?topic=setup_wizardimagefile
 WizardSmallImageFile=.\assets\setup.bmp

-; Ollama requires Windows 10 22H2 or newer for proper unicode rendering
-; TODO: consider setting this to 10.0.19045
+; TODO verifty actual min windows version...
+; OG Win 10
 MinVersion=10.0.10240

 ; First release that supports WinRT UI Composition for win32 apps
@ -87,21 +86,16 @@ Name: "english"; MessagesFile: "compiler:Default.isl"
 DialogFontSize=12

 [Files]
-#if DirExists("..\dist\windows-amd64")
-Source: "..\dist\windows-amd64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: not IsArm64();  Flags: ignoreversion 64bit
-Source: "..\dist\windows-amd64\ollama.exe"; DestDir: "{app}"; Check: not IsArm64(); Flags: ignoreversion 64bit
-Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: not IsArm64(); Flags: ignoreversion 64bit recursesubdirs
-#endif
-
-#if DirExists("..\dist\windows-arm64")
-Source: "..\dist\windows-arm64\vc_redist.arm64.exe"; DestDir: "{tmp}"; Check: IsArm64() and vc_redist_needed(); Flags: deleteafterinstall
-Source: "..\dist\windows-arm64-app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ;Check: IsArm64();  Flags: ignoreversion 64bit
-Source: "..\dist\windows-arm64\ollama.exe"; DestDir: "{app}"; Check: IsArm64(); Flags: ignoreversion 64bit
-Source: "..\dist\windows-arm64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Check: IsArm64(); Flags: ignoreversion 64bit recursesubdirs
-#endif
-
+Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
+Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
+Source: "..\dist\windows-{#ARCH}\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
+Source: "..\dist\windows-{#ARCH}\ollama_runners\*"; DestDir: "{app}\ollama_runners"; Flags: ignoreversion 64bit recursesubdirs
 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
+#if DirExists("..\dist\windows-amd64\rocm")
+  Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
+#endif
+

 [Icons]
 Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
@ -109,9 +103,6 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
 Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"

 [Run]
-#if DirExists("..\dist\windows-arm64")
-Filename: "{tmp}\vc_redist.arm64.exe"; Parameters: "/install /passive /norestart"; Check: IsArm64() and vc_redist_needed(); StatusMsg: "Installing VC++ Redistributables..."; Flags: waituntilterminated
-#endif
 Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden

 [UninstallRun]
@ -131,18 +122,14 @@ Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\models"
 Type: filesandordirs; Name: "{%USERPROFILE}\.ollama\history"
 ; NOTE: if the user has a custom OLLAMA_MODELS it will be preserved

-[InstallDelete]
-Type: filesandordirs; Name: "{%TEMP}\ollama*"
-Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
-
 [Messages]
-WizardReady=Ollama
+WizardReady=Ollama Windows Preview
 ReadyLabel1=%nLet's get you up and running with your own large language models.
 SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit.


 ;FinishedHeadingLabel=Run your first model
-;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3.2
+;FinishedLabel=%nRun this command in a PowerShell or cmd terminal.%n%n%n    ollama run llama3
 ;ClickFinish=%n

 [Registry]
@ -167,39 +154,3 @@ begin
  { Pos() returns 0 if not found }
  Result := Pos(';' + ExpandConstant(Param) + ';', ';' + OrigPath + ';') = 0;
 end;
-
-{ --- VC Runtime libraries discovery code - Only install vc_redist if it isn't already installed ----- }
-const VCRTL_MIN_V1 = 14;
-const VCRTL_MIN_V2 = 40;
-const VCRTL_MIN_V3 = 33807;
-const VCRTL_MIN_V4 = 0;
-
- // check if the minimum required vc redist is installed (by looking the registry)
-function vc_redist_needed (): Boolean;
-var
-  sRegKey: string;
-  v1: Cardinal;
-  v2: Cardinal;
-  v3: Cardinal;
-  v4: Cardinal;
-begin
-  sRegKey := 'SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\arm64';
-  if (RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Major', v1)  and
-      RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Minor', v2) and
-      RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'Bld', v3) and
-      RegQueryDWordValue (HKEY_LOCAL_MACHINE, sRegKey, 'RBld', v4)) then
-  begin
-    Log ('VC Redist version: ' + IntToStr (v1) +
-        '.' + IntToStr (v2) + '.' + IntToStr (v3) +
-        '.' + IntToStr (v4));
-    { Version info was found. Return true if later or equal to our
-       minimal required version RTL_MIN_Vx }
-    Result := not (
-        (v1 > VCRTL_MIN_V1) or ((v1 = VCRTL_MIN_V1) and
-         ((v2 > VCRTL_MIN_V2) or ((v2 = VCRTL_MIN_V2) and
-          ((v3 > VCRTL_MIN_V3) or ((v3 = VCRTL_MIN_V3) and
-           (v4 >= VCRTL_MIN_V4)))))));
-  end
-  else
-    Result := TRUE;
-end;
--- a/app/ollama_welcome.ps1
+++ b/app/ollama_welcome.ps1
@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
 write-host ""
 write-host "Run your first model:"
 write-host ""
-write-host "`tollama run llama3.2"
+write-host "`tollama run llama3"
 write-host ""
--- a/app/store/store.go
+++ b/app/store/store.go
@ -29,6 +29,7 @@ func GetID() string {
 		initStore()
 	}
 	return store.ID
+
 }

 func GetFirstTimeRun() bool {
--- a/app/tray/tray_nonwindows.go
+++ b/app/tray/tray_nonwindows.go
@ -3,11 +3,11 @@
 package tray

 import (
-	"errors"
+	"fmt"

 	"github.com/ollama/ollama/app/tray/commontray"
 )

 func InitPlatformTray(icon, updateIcon []byte) (commontray.OllamaTray, error) {
-	return nil, errors.New("not implemented")
+	return nil, fmt.Errorf("NOT IMPLEMENTED YET")
 }
--- a/app/tray/wintray/eventloop.go
+++ b/app/tray/wintray/eventloop.go
@ -11,7 +11,9 @@ import (
 	"golang.org/x/sys/windows"
 )

-var quitOnce sync.Once
+var (
+	quitOnce sync.Once
+)

 func (t *winTray) Run() {
 	nativeLoop()
@ -45,6 +47,7 @@ func nativeLoop() {
 		default:
 			pTranslateMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
 			pDispatchMessage.Call(uintptr(unsafe.Pointer(m)))  //nolint:errcheck
+
 		}
 	}
 }
@ -157,8 +160,8 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui
 		lResult, _, _ = pDefWindowProc.Call(
 			uintptr(hWnd),
 			uintptr(message),
-			wParam,
-			lParam,
+			uintptr(wParam),
+			uintptr(lParam),
 		)
 	}
 	return
--- a/app/tray/wintray/menus.go
+++ b/app/tray/wintray/menus.go
@ -11,13 +11,12 @@ import (
 )

 const (
-	_ = iota
-	updateAvailableMenuID
-	updateMenuID
-	separatorMenuID
-	diagLogsMenuID
-	diagSeparatorMenuID
-	quitMenuID
+	updatAvailableMenuID = 1
+	updateMenuID         = updatAvailableMenuID + 1
+	separatorMenuID      = updateMenuID + 1
+	diagLogsMenuID       = separatorMenuID + 1
+	diagSeparatorMenuID  = diagLogsMenuID + 1
+	quitMenuID           = diagSeparatorMenuID + 1
 )

 func (t *winTray) initMenus() error {
@ -36,7 +35,7 @@ func (t *winTray) initMenus() error {
 func (t *winTray) UpdateAvailable(ver string) error {
 	if !t.updateNotified {
 		slog.Debug("updating menu and sending notification for new update")
-		if err := t.addOrUpdateMenuItem(updateAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
+		if err := t.addOrUpdateMenuItem(updatAvailableMenuID, 0, updateAvailableMenuTitle, true); err != nil {
 			return fmt.Errorf("unable to create menu entries %w", err)
 		}
 		if err := t.addOrUpdateMenuItem(updateMenuID, 0, updateMenutTitle, false); err != nil {
--- a/app/tray/wintray/tray.go
+++ b/app/tray/wintray/tray.go
@ -11,12 +11,10 @@ import (
 	"path/filepath"
 	"sort"
 	"sync"
-	"syscall"
 	"unsafe"

-	"golang.org/x/sys/windows"
-
 	"github.com/ollama/ollama/app/tray/commontray"
+	"golang.org/x/sys/windows"
 )

 // Helpful sources: https://github.com/golang/exp/blob/master/shiny/driver/internal/win32
@ -188,7 +186,7 @@ func (t *winTray) initInstance() error {
 	t.muNID.Lock()
 	defer t.muNID.Unlock()
 	t.nid = &notifyIconData{
-		Wnd:             t.window,
+		Wnd:             windows.Handle(t.window),
 		ID:              100,
 		Flags:           NIF_MESSAGE,
 		CallbackMessage: t.wmSystrayMessage,
@ -199,6 +197,7 @@ func (t *winTray) initInstance() error {
 }

 func (t *winTray) createMenu() error {
+
 	menuHandle, _, err := pCreatePopupMenu.Call()
 	if menuHandle == 0 {
 		return err
@ -247,7 +246,7 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
 	mi := menuItemInfo{
 		Mask:     MIIM_FTYPE | MIIM_STRING | MIIM_ID | MIIM_STATE,
 		Type:     MFT_STRING,
-		ID:       menuItemId,
+		ID:       uint32(menuItemId),
 		TypeData: titlePtr,
 		Cch:      uint32(len(title)),
 	}
@ -303,10 +302,11 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
 }

 func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
+
 	mi := menuItemInfo{
 		Mask: MIIM_FTYPE | MIIM_ID | MIIM_STATE,
 		Type: MFT_SEPARATOR,
-		ID:   menuItemId,
+		ID:   uint32(menuItemId),
 	}

 	mi.Size = uint32(unsafe.Sizeof(mi))
@ -416,7 +416,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 	iconFilePath := filepath.Join(os.TempDir(), "ollama_temp_icon_"+dataHash)

 	if _, err := os.Stat(iconFilePath); os.IsNotExist(err) {
-		if err := os.WriteFile(iconFilePath, iconBytes, 0o644); err != nil {
+		if err := os.WriteFile(iconFilePath, iconBytes, 0644); err != nil {
 			return "", err
 		}
 	}
@ -426,6 +426,7 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
 // Loads an image from file and shows it in tray.
 // Shell_NotifyIcon: https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159(v=vs.85).aspx
 func (t *winTray) setIcon(src string) error {
+
 	h, err := t.loadIconFrom(src)
 	if err != nil {
 		return err
@ -434,12 +435,7 @@ func (t *winTray) setIcon(src string) error {
 	t.muNID.Lock()
 	defer t.muNID.Unlock()
 	t.nid.Icon = h
-	t.nid.Flags |= NIF_ICON | NIF_TIP
-	if toolTipUTF16, err := syscall.UTF16FromString(commontray.ToolTip); err == nil {
-		copy(t.nid.Tip[:], toolTipUTF16)
-	} else {
-		return err
-	}
+	t.nid.Flags |= NIF_ICON
 	t.nid.Size = uint32(unsafe.Sizeof(*t.nid))

 	return t.nid.modify()
@ -448,6 +444,7 @@ func (t *winTray) setIcon(src string) error {
 // Loads an image from file to be shown in tray or menu item.
 // LoadImage: https://msdn.microsoft.com/en-us/library/windows/desktop/ms648045(v=vs.85).aspx
 func (t *winTray) loadIconFrom(src string) (windows.Handle, error) {
+
 	// Save and reuse handles of loaded images
 	t.muLoadedImages.RLock()
 	h, ok := t.loadedImages[src]
--- a/app/tray/wintray/w32api.go
+++ b/app/tray/wintray/w32api.go
@ -61,7 +61,6 @@ const (
 	MIIM_SUBMENU        = 0x00000004
 	MIM_APPLYTOSUBMENUS = 0x80000000
 	NIF_ICON            = 0x00000002
-	NIF_TIP             = 0x00000004
 	NIF_INFO            = 0x00000010
 	NIF_MESSAGE         = 0x00000001
 	SW_HIDE             = 0
--- a/auth/auth.go
+++ b/auth/auth.go
@ -5,7 +5,6 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/base64"
-	"errors"
 	"fmt"
 	"io"
 	"log/slog"
@ -79,7 +78,7 @@ func Sign(ctx context.Context, bts []byte) (string, error) {
 	publicKey := ssh.MarshalAuthorizedKey(privateKey.PublicKey())
 	parts := bytes.Split(publicKey, []byte(" "))
 	if len(parts) < 2 {
-		return "", errors.New("malformed public key")
+		return "", fmt.Errorf("malformed public key")
 	}

 	signedData, err := privateKey.Sign(rand.Reader, bts)
--- a/build/darwin/amd64/placeholder
+++ b/build/darwin/amd64/placeholder
@ -1 +0,0 @@
-This is here to make sure the build/ directory exists for the go:embed command
--- a/build/darwin/arm64/placeholder
+++ b/build/darwin/arm64/placeholder
@ -1 +0,0 @@
-This is here to make sure the build/ directory exists for the go:embed command
--- a/build/embed_darwin_amd64.go
+++ b/build/embed_darwin_amd64.go
@ -1,8 +0,0 @@
-package build
-
-import "embed"
-
-// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
-
-//go:embed darwin/amd64/*
-var EmbedFS embed.FS
--- a/build/embed_darwin_arm64.go
+++ b/build/embed_darwin_arm64.go
@ -1,8 +0,0 @@
-package build
-
-import "embed"
-
-// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
-
-//go:embed darwin/arm64/*
-var EmbedFS embed.FS
--- a/build/embed_linux.go
+++ b/build/embed_linux.go
@ -1,6 +0,0 @@
-package build
-
-import "embed"
-
-//go:embed linux/*
-var EmbedFS embed.FS
--- a/build/embed_unused.go
+++ b/build/embed_unused.go
@ -1,8 +0,0 @@
-//go:build !linux && !darwin
-
-package build
-
-import "embed"
-
-// unused on windows
-var EmbedFS embed.FS
--- a/build/linux/amd64/placeholder
+++ b/build/linux/amd64/placeholder
@ -1 +0,0 @@
-This is here to make sure the build/ directory exists for the go:embed command
--- a/build/linux/arm64/placeholder
+++ b/build/linux/arm64/placeholder
@ -1 +0,0 @@
-This is here to make sure the build/ directory exists for the go:embed command
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@ -2,7 +2,6 @@ package cmd

 import (
 	"archive/zip"
-	"bufio"
 	"bytes"
 	"context"
 	"crypto/ed25519"
@ -21,9 +20,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"runtime"
-	"strconv"
 	"strings"
-	"sync/atomic"
 	"syscall"
 	"time"

@ -32,6 +29,7 @@ import (
 	"github.com/olekukonko/tablewriter"
 	"github.com/spf13/cobra"
 	"golang.org/x/crypto/ssh"
+	"golang.org/x/exp/slices"
 	"golang.org/x/term"

 	"github.com/ollama/ollama/api"
@ -46,58 +44,28 @@ import (
 	"github.com/ollama/ollama/version"
 )

-var (
-	errModelNotFound     = errors.New("no Modelfile or safetensors files found")
-	errModelfileNotFound = errors.New("specified Modelfile wasn't found")
-)
-
-func getModelfileName(cmd *cobra.Command) (string, error) {
-	fn, _ := cmd.Flags().GetString("file")
-
-	filename := fn
-	if filename == "" {
-		filename = "Modelfile"
-	}
-
-	absName, err := filepath.Abs(filename)
-	if err != nil {
-		return "", err
-	}
-
-	_, err = os.Stat(absName)
-	if err != nil {
-		return fn, err
-	}
-
-	return absName, nil
-}
-
 func CreateHandler(cmd *cobra.Command, args []string) error {
+	filename, _ := cmd.Flags().GetString("file")
+	filename, err := filepath.Abs(filename)
+	if err != nil {
+		return err
+	}
+
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		return err
+	}
+
 	p := progress.NewProgress(os.Stderr)
 	defer p.Stop()

-	var reader io.Reader
-
-	filename, err := getModelfileName(cmd)
-	if os.IsNotExist(err) {
-		if filename == "" {
-			reader = strings.NewReader("FROM .\n")
-		} else {
-			return errModelfileNotFound
-		}
-	} else if err != nil {
+	f, err := os.Open(filename)
+	if err != nil {
 		return err
-	} else {
-		f, err := os.Open(filename)
-		if err != nil {
-			return err
-		}
-
-		reader = f
-		defer f.Close()
 	}
+	defer f.Close()

-	modelfile, err := parser.ParseFile(reader)
+	modelfile, err := parser.ParseFile(f)
 	if err != nil {
 		return err
 	}
@ -110,12 +78,6 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	status := "transferring model data"
 	spinner := progress.NewSpinner(status)
 	p.Add(status, spinner)
-	defer p.Stop()
-
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		return err
-	}

 	for i := range modelfile.Commands {
 		switch modelfile.Commands[i].Name {
@ -150,7 +112,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				path = tempfile
 			}

-			digest, err := createBlob(cmd, client, path, spinner)
+			digest, err := createBlob(cmd, client, path)
 			if err != nil {
 				return err
 			}
@ -200,6 +162,9 @@ func tempZipFiles(path string) (string, error) {
 	}
 	defer tempfile.Close()

+	zipfile := zip.NewWriter(tempfile)
+	defer zipfile.Close()
+
 	detectContentType := func(path string) (string, error) {
 		f, err := os.Open(path)
 		if err != nil {
@ -240,12 +205,6 @@ func tempZipFiles(path string) (string, error) {
 		// safetensors files might be unresolved git lfs references; skip if they are
 		// covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
 		files = append(files, st...)
-	} else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
-		// covers adapters.safetensors
-		files = append(files, st...)
-	} else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
-		// covers adapter_model.safetensors
-		files = append(files, st...)
 	} else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
 		// pytorch files might also be unresolved git lfs references; skip if they are
 		// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
@ -255,7 +214,7 @@ func tempZipFiles(path string) (string, error) {
 		// covers consolidated.x.pth, consolidated.pth
 		files = append(files, pt...)
 	} else {
-		return "", errModelNotFound
+		return "", errors.New("no safetensors or torch files found")
 	}

 	// add configuration files, json files are detected as text/plain
@ -265,14 +224,6 @@ func tempZipFiles(path string) (string, error) {
 	}
 	files = append(files, js...)

-	// bert models require a nested config.json
-	// TODO(mxyng): merge this with the glob above
-	js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
-	if err != nil {
-		return "", err
-	}
-	files = append(files, js...)
-
 	if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
 		// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
 		// tokenizer.model might be a unresolved git lfs reference; error if it is
@ -282,9 +233,6 @@ func tempZipFiles(path string) (string, error) {
 		files = append(files, tks...)
 	}

-	zipfile := zip.NewWriter(tempfile)
-	defer zipfile.Close()
-
 	for _, file := range files {
 		f, err := os.Open(file)
 		if err != nil {
@ -302,11 +250,6 @@ func tempZipFiles(path string) (string, error) {
 			return "", err
 		}

-		zfi.Name, err = filepath.Rel(path, file)
-		if err != nil {
-			return "", err
-		}
-
 		zf, err := zipfile.CreateHeader(zfi)
 		if err != nil {
 			return "", err
@ -320,20 +263,13 @@ func tempZipFiles(path string) (string, error) {
 	return tempfile.Name(), nil
 }

-func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *progress.Spinner) (string, error) {
+func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, error) {
 	bin, err := os.Open(path)
 	if err != nil {
 		return "", err
 	}
 	defer bin.Close()

-	// Get file info to retrieve the size
-	fileInfo, err := bin.Stat()
-	if err != nil {
-		return "", err
-	}
-	fileSize := fileInfo.Size()
-
 	hash := sha256.New()
 	if _, err := io.Copy(hash, bin); err != nil {
 		return "", err
@ -343,83 +279,46 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string, spinner *pr
 		return "", err
 	}

-	var pw progressWriter
-	status := "transferring model data 0%"
-	spinner.SetMessage(status)
-
-	done := make(chan struct{})
-	defer close(done)
-
-	go func() {
-		ticker := time.NewTicker(60 * time.Millisecond)
-		defer ticker.Stop()
-		for {
-			select {
-			case <-ticker.C:
-				spinner.SetMessage(fmt.Sprintf("transferring model data %d%%", int(100*pw.n.Load()/fileSize)))
-			case <-done:
-				spinner.SetMessage("transferring model data 100%")
-				return
-			}
-		}
-	}()
-
 	digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
-	if err = client.CreateBlob(cmd.Context(), digest, io.TeeReader(bin, &pw)); err != nil {
+	if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
 		return "", err
 	}
 	return digest, nil
 }

-type progressWriter struct {
-	n atomic.Int64
-}
-
-func (w *progressWriter) Write(p []byte) (n int, err error) {
-	w.n.Add(int64(len(p)))
-	return len(p), nil
-}
-
-func loadOrUnloadModel(cmd *cobra.Command, opts *runOptions) error {
-	p := progress.NewProgress(os.Stderr)
-	defer p.StopAndClear()
-
-	spinner := progress.NewSpinner("")
-	p.Add("", spinner)
-
+func RunHandler(cmd *cobra.Command, args []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}

-	req := &api.GenerateRequest{
-		Model:     opts.Model,
-		KeepAlive: opts.KeepAlive,
-	}
+	name := args[0]

-	return client.Generate(cmd.Context(), req, func(api.GenerateResponse) error { return nil })
-}
-
-func StopHandler(cmd *cobra.Command, args []string) error {
-	opts := &runOptions{
-		Model:     args[0],
-		KeepAlive: &api.Duration{Duration: 0},
-	}
-	if err := loadOrUnloadModel(cmd, opts); err != nil {
-		if strings.Contains(err.Error(), "not found") {
-			return fmt.Errorf("couldn't find model \"%s\" to stop", args[0])
+	// check if the model exists on the server
+	show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
+	var statusError api.StatusError
+	switch {
+	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
+		if err := PullHandler(cmd, []string{name}); err != nil {
+			return err
 		}
-	}
-	return nil
-}

-func RunHandler(cmd *cobra.Command, args []string) error {
+		show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
+		if err != nil {
+			return err
+		}
+	case err != nil:
+		return err
+	}
+
 	interactive := true

 	opts := runOptions{
-		Model:    args[0],
-		WordWrap: os.Getenv("TERM") == "xterm-256color",
-		Options:  map[string]interface{}{},
+		Model:       args[0],
+		WordWrap:    os.Getenv("TERM") == "xterm-256color",
+		Options:     map[string]interface{}{},
+		MultiModal:  slices.Contains(show.Details.Families, "clip"),
+		ParentModel: show.Details.ParentModel,
 	}

 	format, err := cmd.Flags().GetString("format")
@ -463,53 +362,11 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 	}
 	opts.WordWrap = !nowrap

-	// Fill out the rest of the options based on information about the
-	// model.
-	client, err := api.ClientFromEnvironment()
-	if err != nil {
-		return err
+	if !interactive {
+		return generate(cmd, opts)
 	}

-	name := args[0]
-	info, err := func() (*api.ShowResponse, error) {
-		showReq := &api.ShowRequest{Name: name}
-		info, err := client.Show(cmd.Context(), showReq)
-		var se api.StatusError
-		if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
-			if err := PullHandler(cmd, []string{name}); err != nil {
-				return nil, err
-			}
-			return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
-		}
-		return info, err
-	}()
-	if err != nil {
-		return err
-	}
-
-	opts.MultiModal = len(info.ProjectorInfo) != 0
-	opts.ParentModel = info.Details.ParentModel
-
-	if interactive {
-		if err := loadOrUnloadModel(cmd, &opts); err != nil {
-			return err
-		}
-
-		for _, msg := range info.Messages {
-			switch msg.Role {
-			case "user":
-				fmt.Printf(">>> %s\n", msg.Content)
-			case "assistant":
-				state := &displayResponseState{}
-				displayResponse(msg.Content, opts.WordWrap, state)
-				fmt.Println()
-				fmt.Println()
-			}
-		}
-
-		return generateInteractive(cmd, opts)
-	}
-	return generate(cmd, opts)
+	return generateInteractive(cmd, opts)
 }

 func errFromUnknownKey(unknownKeyErr error) error {
@ -647,7 +504,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
 	table.SetHeaderLine(false)
 	table.SetBorder(false)
 	table.SetNoWhiteSpace(true)
-	table.SetTablePadding("    ")
+	table.SetTablePadding("\t")
 	table.AppendBulk(data)
 	table.Render()

@ -682,15 +539,7 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
 				cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
 				procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
 			}
-
-			var until string
-			delta := time.Since(m.ExpiresAt)
-			if delta > 0 {
-				until = "Stopping..."
-			} else {
-				until = format.HumanTime(m.ExpiresAt, "Never")
-			}
-			data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, until})
+			data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
 		}
 	}

@ -701,7 +550,7 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
 	table.SetHeaderLine(false)
 	table.SetBorder(false)
 	table.SetNoWhiteSpace(true)
-	table.SetTablePadding("    ")
+	table.SetTablePadding("\t")
 	table.AppendBulk(data)
 	table.Render()

@ -714,17 +563,6 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	// Unload the model if it's running before deletion
-	opts := &runOptions{
-		Model:     args[0],
-		KeepAlive: &api.Duration{Duration: 0},
-	}
-	if err := loadOrUnloadModel(cmd, opts); err != nil {
-		if !strings.Contains(err.Error(), "not found") {
-			return fmt.Errorf("unable to stop existing running model \"%s\": %s", args[0], err)
-		}
-	}
-
 	for _, name := range args {
 		req := api.DeleteRequest{Name: name}
 		if err := client.Delete(cmd.Context(), &req); err != nil {
@ -741,6 +579,10 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

+	if len(args) != 1 {
+		return errors.New("missing model name")
+	}
+
 	license, errLicense := cmd.Flags().GetBool("license")
 	modelfile, errModelfile := cmd.Flags().GetBool("modelfile")
 	parameters, errParams := cmd.Flags().GetBool("parameters")
@ -783,6 +625,8 @@ func ShowHandler(cmd *cobra.Command, args []string) error {

 	if flagsSet > 1 {
 		return errors.New("only one of '--license', '--modelfile', '--parameters', '--system', or '--template' can be specified")
+	} else if flagsSet == 0 {
+		return errors.New("one of '--license', '--modelfile', '--parameters', '--system', or '--template' must be specified")
 	}

 	req := api.ShowRequest{Name: args[0]}
@ -791,103 +635,17 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	if flagsSet == 1 {
-		switch showType {
-		case "license":
-			fmt.Println(resp.License)
-		case "modelfile":
-			fmt.Println(resp.Modelfile)
-		case "parameters":
-			fmt.Println(resp.Parameters)
-		case "system":
-			fmt.Print(resp.System)
-		case "template":
-			fmt.Print(resp.Template)
-		}
-
-		return nil
-	}
-
-	return showInfo(resp, os.Stdout)
-}
-
-func showInfo(resp *api.ShowResponse, w io.Writer) error {
-	tableRender := func(header string, rows func() [][]string) {
-		fmt.Fprintln(w, " ", header)
-		table := tablewriter.NewWriter(w)
-		table.SetAlignment(tablewriter.ALIGN_LEFT)
-		table.SetBorder(false)
-		table.SetNoWhiteSpace(true)
-		table.SetTablePadding("    ")
-
-		switch header {
-		case "Template", "System", "License":
-			table.SetColWidth(100)
-		}
-
-		table.AppendBulk(rows())
-		table.Render()
-		fmt.Fprintln(w)
-	}
-
-	tableRender("Model", func() (rows [][]string) {
-		if resp.ModelInfo != nil {
-			arch := resp.ModelInfo["general.architecture"].(string)
-			rows = append(rows, []string{"", "architecture", arch})
-			rows = append(rows, []string{"", "parameters", format.HumanNumber(uint64(resp.ModelInfo["general.parameter_count"].(float64)))})
-			rows = append(rows, []string{"", "context length", strconv.FormatFloat(resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64), 'f', -1, 64)})
-			rows = append(rows, []string{"", "embedding length", strconv.FormatFloat(resp.ModelInfo[fmt.Sprintf("%s.embedding_length", arch)].(float64), 'f', -1, 64)})
-		} else {
-			rows = append(rows, []string{"", "architecture", resp.Details.Family})
-			rows = append(rows, []string{"", "parameters", resp.Details.ParameterSize})
-		}
-		rows = append(rows, []string{"", "quantization", resp.Details.QuantizationLevel})
-		return
-	})
-
-	if resp.ProjectorInfo != nil {
-		tableRender("Projector", func() (rows [][]string) {
-			arch := resp.ProjectorInfo["general.architecture"].(string)
-			rows = append(rows, []string{"", "architecture", arch})
-			rows = append(rows, []string{"", "parameters", format.HumanNumber(uint64(resp.ProjectorInfo["general.parameter_count"].(float64)))})
-			rows = append(rows, []string{"", "embedding length", strconv.FormatFloat(resp.ProjectorInfo[fmt.Sprintf("%s.vision.embedding_length", arch)].(float64), 'f', -1, 64)})
-			rows = append(rows, []string{"", "dimensions", strconv.FormatFloat(resp.ProjectorInfo[fmt.Sprintf("%s.vision.projection_dim", arch)].(float64), 'f', -1, 64)})
-			return
-		})
-	}
-
-	if resp.Parameters != "" {
-		tableRender("Parameters", func() (rows [][]string) {
-			scanner := bufio.NewScanner(strings.NewReader(resp.Parameters))
-			for scanner.Scan() {
-				if text := scanner.Text(); text != "" {
-					rows = append(rows, append([]string{""}, strings.Fields(text)...))
-				}
-			}
-			return
-		})
-	}
-
-	head := func(s string, n int) (rows [][]string) {
-		scanner := bufio.NewScanner(strings.NewReader(s))
-		for scanner.Scan() && (len(rows) < n || n < 0) {
-			if text := scanner.Text(); text != "" {
-				rows = append(rows, []string{"", strings.TrimSpace(text)})
-			}
-		}
-		return
-	}
-
-	if resp.System != "" {
-		tableRender("System", func() [][]string {
-			return head(resp.System, 2)
-		})
-	}
-
-	if resp.License != "" {
-		tableRender("License", func() [][]string {
-			return head(resp.License, 2)
-		})
+	switch showType {
+	case "license":
+		fmt.Println(resp.License)
+	case "modelfile":
+		fmt.Println(resp.Modelfile)
+	case "parameters":
+		fmt.Println(resp.Parameters)
+	case "system":
+		fmt.Println(resp.System)
+	case "template":
+		fmt.Println(resp.Template)
 	}

 	return nil
@ -971,6 +729,7 @@ type runOptions struct {
 	WordWrap    bool
 	Format      string
 	System      string
+	Template    string
 	Images      []api.ImageData
 	Options     map[string]interface{}
 	MultiModal  bool
@ -987,6 +746,7 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
 	if wordWrap && termWidth >= 10 {
 		for _, ch := range content {
 			if state.lineLength+1 > termWidth-5 {
+
 				if runewidth.StringWidth(state.wordBuffer) > termWidth-10 {
 					fmt.Printf("%s%c", state.wordBuffer, ch)
 					state.wordBuffer = ""
@ -1164,6 +924,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 		Images:    opts.Images,
 		Format:    opts.Format,
 		System:    opts.System,
+		Template:  opts.Template,
 		Options:   opts.Options,
 		KeepAlive: opts.KeepAlive,
 	}
@ -1199,12 +960,18 @@ func generate(cmd *cobra.Command, opts runOptions) error {
 	return nil
 }

-func RunServer(_ *cobra.Command, _ []string) error {
+func RunServer(cmd *cobra.Command, _ []string) error {
+	// retrieve the OLLAMA_HOST environment variable
+	ollamaHost, err := api.GetOllamaHost()
+	if err != nil {
+		return err
+	}
+
 	if err := initializeKeypair(); err != nil {
 		return err
 	}

-	ln, err := net.Listen("tcp", envconfig.Host().Host)
+	ln, err := net.Listen("tcp", net.JoinHostPort(ollamaHost.Host, ollamaHost.Port))
 	if err != nil {
 		return err
 	}
@ -1263,6 +1030,24 @@ func initializeKeypair() error {
 	return nil
 }

+//nolint:unused
+func waitForServer(ctx context.Context, client *api.Client) error {
+	// wait for the server to start
+	timeout := time.After(5 * time.Second)
+	tick := time.Tick(500 * time.Millisecond)
+	for {
+		select {
+		case <-timeout:
+			return errors.New("timed out waiting for server to start")
+		case <-tick:
+			if err := client.Heartbeat(ctx); err == nil {
+				return nil // server has started
+			}
+		}
+	}
+
+}
+
 func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
@ -1273,7 +1058,7 @@ func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
 			return err
 		}
 		if err := startApp(cmd.Context(), client); err != nil {
-			return errors.New("could not connect to ollama app, is it running?")
+			return fmt.Errorf("could not connect to ollama app, is it running?")
 		}
 	}
 	return nil
@ -1318,7 +1103,7 @@ func NewCLI() *cobra.Command {
 	log.SetFlags(log.LstdFlags | log.Lshortfile)
 	cobra.EnableCommandSorting = false

-	if runtime.GOOS == "windows" && term.IsTerminal(int(os.Stdout.Fd())) {
+	if runtime.GOOS == "windows" {
 		console.ConsoleFromFile(os.Stdin) //nolint:errcheck
 	}

@ -1350,7 +1135,7 @@ func NewCLI() *cobra.Command {
 		RunE:    CreateHandler,
 	}

-	createCmd.Flags().StringP("file", "f", "", "Name of the Modelfile (default \"Modelfile\"")
+	createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
 	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")

 	showCmd := &cobra.Command{
@ -1380,15 +1165,6 @@ func NewCLI() *cobra.Command {
 	runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
 	runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
 	runCmd.Flags().String("format", "", "Response format (e.g. json)")
-
-	stopCmd := &cobra.Command{
-		Use:     "stop MODEL",
-		Short:   "Stop a running model",
-		Args:    cobra.ExactArgs(1),
-		PreRunE: checkServerHeartbeat,
-		RunE:    StopHandler,
-	}
-
 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
@ -1456,7 +1232,6 @@ func NewCLI() *cobra.Command {
 		createCmd,
 		showCmd,
 		runCmd,
-		stopCmd,
 		pullCmd,
 		pushCmd,
 		listCmd,
@ -1479,12 +1254,10 @@ func NewCLI() *cobra.Command {
 				envVars["OLLAMA_NUM_PARALLEL"],
 				envVars["OLLAMA_NOPRUNE"],
 				envVars["OLLAMA_ORIGINS"],
-				envVars["OLLAMA_SCHED_SPREAD"],
 				envVars["OLLAMA_TMPDIR"],
 				envVars["OLLAMA_FLASH_ATTENTION"],
 				envVars["OLLAMA_LLM_LIBRARY"],
-				envVars["OLLAMA_GPU_OVERHEAD"],
-				envVars["OLLAMA_LOAD_TIMEOUT"],
+				envVars["OLLAMA_MAX_VRAM"],
 			})
 		default:
 			appendEnvDocs(cmd, envs)
@ -1496,7 +1269,6 @@ func NewCLI() *cobra.Command {
 		createCmd,
 		showCmd,
 		runCmd,
-		stopCmd,
 		pullCmd,
 		pushCmd,
 		listCmd,
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@ -1,371 +0,0 @@
-package cmd
-
-import (
-	"bytes"
-	"context"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-	"github.com/spf13/cobra"
-
-	"github.com/ollama/ollama/api"
-)
-
-func TestShowInfo(t *testing.T) {
-	t.Run("bare details", func(t *testing.T) {
-		var b bytes.Buffer
-		if err := showInfo(&api.ShowResponse{
-			Details: api.ModelDetails{
-				Family:            "test",
-				ParameterSize:     "7B",
-				QuantizationLevel: "FP16",
-			},
-		}, &b); err != nil {
-			t.Fatal(err)
-		}
-
-		expect := `  Model
-    architecture    test    
-    parameters      7B      
-    quantization    FP16    
-
-`
-
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
-			t.Errorf("unexpected output (-want +got):\n%s", diff)
-		}
-	})
-
-	t.Run("bare model info", func(t *testing.T) {
-		var b bytes.Buffer
-		if err := showInfo(&api.ShowResponse{
-			ModelInfo: map[string]any{
-				"general.architecture":    "test",
-				"general.parameter_count": float64(7_000_000_000),
-				"test.context_length":     float64(0),
-				"test.embedding_length":   float64(0),
-			},
-			Details: api.ModelDetails{
-				Family:            "test",
-				ParameterSize:     "7B",
-				QuantizationLevel: "FP16",
-			},
-		}, &b); err != nil {
-			t.Fatal(err)
-		}
-
-		expect := `  Model
-    architecture        test    
-    parameters          7B      
-    context length      0       
-    embedding length    0       
-    quantization        FP16    
-
-`
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
-			t.Errorf("unexpected output (-want +got):\n%s", diff)
-		}
-	})
-
-	t.Run("parameters", func(t *testing.T) {
-		var b bytes.Buffer
-		if err := showInfo(&api.ShowResponse{
-			Details: api.ModelDetails{
-				Family:            "test",
-				ParameterSize:     "7B",
-				QuantizationLevel: "FP16",
-			},
-			Parameters: `
-			stop never
-			stop gonna
-			stop give
-			stop you
-			stop up
-			temperature 99`,
-		}, &b); err != nil {
-			t.Fatal(err)
-		}
-
-		expect := `  Model
-    architecture    test    
-    parameters      7B      
-    quantization    FP16    
-
-  Parameters
-    stop           never    
-    stop           gonna    
-    stop           give     
-    stop           you      
-    stop           up       
-    temperature    99       
-
-`
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
-			t.Errorf("unexpected output (-want +got):\n%s", diff)
-		}
-	})
-
-	t.Run("project info", func(t *testing.T) {
-		var b bytes.Buffer
-		if err := showInfo(&api.ShowResponse{
-			Details: api.ModelDetails{
-				Family:            "test",
-				ParameterSize:     "7B",
-				QuantizationLevel: "FP16",
-			},
-			ProjectorInfo: map[string]any{
-				"general.architecture":         "clip",
-				"general.parameter_count":      float64(133_700_000),
-				"clip.vision.embedding_length": float64(0),
-				"clip.vision.projection_dim":   float64(0),
-			},
-		}, &b); err != nil {
-			t.Fatal(err)
-		}
-
-		expect := `  Model
-    architecture    test    
-    parameters      7B      
-    quantization    FP16    
-
-  Projector
-    architecture        clip       
-    parameters          133.70M    
-    embedding length    0          
-    dimensions          0          
-
-`
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
-			t.Errorf("unexpected output (-want +got):\n%s", diff)
-		}
-	})
-
-	t.Run("system", func(t *testing.T) {
-		var b bytes.Buffer
-		if err := showInfo(&api.ShowResponse{
-			Details: api.ModelDetails{
-				Family:            "test",
-				ParameterSize:     "7B",
-				QuantizationLevel: "FP16",
-			},
-			System: `You are a pirate!
-Ahoy, matey!
-Weigh anchor!
-			`,
-		}, &b); err != nil {
-			t.Fatal(err)
-		}
-
-		expect := `  Model
-    architecture    test    
-    parameters      7B      
-    quantization    FP16    
-
-  System
-    You are a pirate!    
-    Ahoy, matey!         
-
-`
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
-			t.Errorf("unexpected output (-want +got):\n%s", diff)
-		}
-	})
-
-	t.Run("license", func(t *testing.T) {
-		var b bytes.Buffer
-		license, err := os.ReadFile(filepath.Join("..", "LICENSE"))
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if err := showInfo(&api.ShowResponse{
-			Details: api.ModelDetails{
-				Family:            "test",
-				ParameterSize:     "7B",
-				QuantizationLevel: "FP16",
-			},
-			License: string(license),
-		}, &b); err != nil {
-			t.Fatal(err)
-		}
-
-		expect := `  Model
-    architecture    test    
-    parameters      7B      
-    quantization    FP16    
-
-  License
-    MIT License             
-    Copyright (c) Ollama    
-
-`
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
-			t.Errorf("unexpected output (-want +got):\n%s", diff)
-		}
-	})
-}
-
-func TestDeleteHandler(t *testing.T) {
-	stopped := false
-	mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		if r.URL.Path == "/api/delete" && r.Method == http.MethodDelete {
-			var req api.DeleteRequest
-			if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-				http.Error(w, err.Error(), http.StatusBadRequest)
-				return
-			}
-			if req.Name == "test-model" {
-				w.WriteHeader(http.StatusOK)
-			} else {
-				w.WriteHeader(http.StatusNotFound)
-			}
-			return
-		}
-		if r.URL.Path == "/api/generate" && r.Method == http.MethodPost {
-			var req api.GenerateRequest
-			if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-				http.Error(w, err.Error(), http.StatusBadRequest)
-				return
-			}
-			if req.Model == "test-model" {
-				w.WriteHeader(http.StatusOK)
-				if err := json.NewEncoder(w).Encode(api.GenerateResponse{
-					Done: true,
-				}); err != nil {
-					http.Error(w, err.Error(), http.StatusInternalServerError)
-				}
-				stopped = true
-				return
-			} else {
-				w.WriteHeader(http.StatusNotFound)
-				if err := json.NewEncoder(w).Encode(api.GenerateResponse{
-					Done: false,
-				}); err != nil {
-					http.Error(w, err.Error(), http.StatusInternalServerError)
-				}
-			}
-		}
-	}))
-
-	t.Setenv("OLLAMA_HOST", mockServer.URL)
-	t.Cleanup(mockServer.Close)
-
-	cmd := &cobra.Command{}
-	cmd.SetContext(context.TODO())
-	if err := DeleteHandler(cmd, []string{"test-model"}); err != nil {
-		t.Fatalf("DeleteHandler failed: %v", err)
-	}
-	if !stopped {
-		t.Fatal("Model was not stopped before deletion")
-	}
-
-	err := DeleteHandler(cmd, []string{"test-model-not-found"})
-	if err == nil || !strings.Contains(err.Error(), "unable to stop existing running model \"test-model-not-found\"") {
-		t.Fatalf("DeleteHandler failed: expected error about stopping non-existent model, got %v", err)
-	}
-}
-
-func TestGetModelfileName(t *testing.T) {
-	tests := []struct {
-		name          string
-		modelfileName string
-		fileExists    bool
-		expectedName  string
-		expectedErr   error
-	}{
-		{
-			name:          "no modelfile specified, no modelfile exists",
-			modelfileName: "",
-			fileExists:    false,
-			expectedName:  "",
-			expectedErr:   os.ErrNotExist,
-		},
-		{
-			name:          "no modelfile specified, modelfile exists",
-			modelfileName: "",
-			fileExists:    true,
-			expectedName:  "Modelfile",
-			expectedErr:   nil,
-		},
-		{
-			name:          "modelfile specified, no modelfile exists",
-			modelfileName: "crazyfile",
-			fileExists:    false,
-			expectedName:  "crazyfile",
-			expectedErr:   os.ErrNotExist,
-		},
-		{
-			name:          "modelfile specified, modelfile exists",
-			modelfileName: "anotherfile",
-			fileExists:    true,
-			expectedName:  "anotherfile",
-			expectedErr:   nil,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			cmd := &cobra.Command{
-				Use: "fakecmd",
-			}
-			cmd.Flags().String("file", "", "path to modelfile")
-
-			var expectedFilename string
-
-			if tt.fileExists {
-				tempDir, err := os.MkdirTemp("", "modelfiledir")
-				defer os.RemoveAll(tempDir)
-				if err != nil {
-					t.Fatalf("temp modelfile dir creation failed: %v", err)
-				}
-				var fn string
-				if tt.modelfileName != "" {
-					fn = tt.modelfileName
-				} else {
-					fn = "Modelfile"
-				}
-
-				tempFile, err := os.CreateTemp(tempDir, fn)
-				if err != nil {
-					t.Fatalf("temp modelfile creation failed: %v", err)
-				}
-
-				expectedFilename = tempFile.Name()
-				err = cmd.Flags().Set("file", expectedFilename)
-				if err != nil {
-					t.Fatalf("couldn't set file flag: %v", err)
-				}
-			} else {
-				if tt.modelfileName != "" {
-					expectedFilename = tt.modelfileName
-					err := cmd.Flags().Set("file", tt.modelfileName)
-					if err != nil {
-						t.Fatalf("couldn't set file flag: %v", err)
-					}
-				}
-			}
-
-			actualFilename, actualErr := getModelfileName(cmd)
-
-			if actualFilename != expectedFilename {
-				t.Errorf("expected filename: '%s' actual filename: '%s'", expectedFilename, actualFilename)
-			}
-
-			if tt.expectedErr != os.ErrNotExist {
-				if actualErr != tt.expectedErr {
-					t.Errorf("expected err: %v actual err: %v", tt.expectedErr, actualErr)
-				}
-			} else {
-				if !os.IsNotExist(actualErr) {
-					t.Errorf("expected err: %v actual err: %v", tt.expectedErr, actualErr)
-				}
-			}
-		})
-	}
-}
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@ -1,7 +1,6 @@
 package cmd

 import (
-	"cmp"
 	"errors"
 	"fmt"
 	"io"
@ -9,15 +8,15 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
-	"slices"
+	"sort"
 	"strings"

 	"github.com/spf13/cobra"
-	"golang.org/x/exp/maps"
+	"golang.org/x/exp/slices"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/parser"
+	"github.com/ollama/ollama/progress"
 	"github.com/ollama/ollama/readline"
 	"github.com/ollama/ollama/types/errtypes"
 )
@ -28,9 +27,74 @@ const (
 	MultilineNone MultilineState = iota
 	MultilinePrompt
 	MultilineSystem
+	MultilineTemplate
 )

+func loadModel(cmd *cobra.Command, opts *runOptions) error {
+	client, err := api.ClientFromEnvironment()
+	if err != nil {
+		return err
+	}
+
+	p := progress.NewProgress(os.Stderr)
+	defer p.StopAndClear()
+
+	spinner := progress.NewSpinner("")
+	p.Add("", spinner)
+
+	showReq := api.ShowRequest{Name: opts.Model}
+	showResp, err := client.Show(cmd.Context(), &showReq)
+	if err != nil {
+		return err
+	}
+	opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
+	opts.ParentModel = showResp.Details.ParentModel
+
+	if len(showResp.Messages) > 0 {
+		opts.Messages = append(opts.Messages, showResp.Messages...)
+	}
+
+	chatReq := &api.ChatRequest{
+		Model:    opts.Model,
+		Messages: []api.Message{},
+	}
+
+	if opts.KeepAlive != nil {
+		chatReq.KeepAlive = opts.KeepAlive
+	}
+
+	err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
+		p.StopAndClear()
+		if len(opts.Messages) > 0 {
+			for _, msg := range opts.Messages {
+				switch msg.Role {
+				case "user":
+					fmt.Printf(">>> %s\n", msg.Content)
+				case "assistant":
+					state := &displayResponseState{}
+					displayResponse(msg.Content, opts.WordWrap, state)
+					fmt.Println()
+					fmt.Println()
+				}
+			}
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
 func generateInteractive(cmd *cobra.Command, opts runOptions) error {
+	opts.Messages = make([]api.Message, 0)
+
+	err := loadModel(cmd, &opts)
+	if err != nil {
+		return err
+	}
+
 	usage := func() {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set            Set session variables")
@ -55,6 +119,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "Available Commands:")
 		fmt.Fprintln(os.Stderr, "  /set parameter ...     Set a parameter")
 		fmt.Fprintln(os.Stderr, "  /set system <string>   Set system message")
+		fmt.Fprintln(os.Stderr, "  /set template <string> Set prompt template")
 		fmt.Fprintln(os.Stderr, "  /set history           Enable history")
 		fmt.Fprintln(os.Stderr, "  /set nohistory         Disable history")
 		fmt.Fprintln(os.Stderr, "  /set wordwrap          Enable wordwrap")
@ -100,7 +165,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "  /set parameter num_predict <int>      Max number of tokens to predict")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_k <int>            Pick from top k num of tokens")
 		fmt.Fprintln(os.Stderr, "  /set parameter top_p <float>          Pick token based on sum of probabilities")
-		fmt.Fprintln(os.Stderr, "  /set parameter min_p <float>          Pick token based on top token probability * min_p")
 		fmt.Fprintln(os.Stderr, "  /set parameter num_ctx <int>          Set the context size")
 		fmt.Fprintln(os.Stderr, "  /set parameter temperature <float>    Set creativity level")
 		fmt.Fprintln(os.Stderr, "  /set parameter repeat_penalty <float> How strongly to penalize repetitions")
@ -120,7 +184,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		return err
 	}

-	if envconfig.NoHistory() {
+	if envconfig.NoHistory {
 		scanner.HistoryDisable()
 	}

@ -165,6 +229,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 				opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
 				fmt.Println("Set system message.")
 				sb.Reset()
+			case MultilineTemplate:
+				opts.Template = sb.String()
+				fmt.Println("Set prompt template.")
+				sb.Reset()
 			}

 			multiline = MultilineNone
@ -196,7 +264,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 			opts.Model = args[1]
 			opts.Messages = []api.Message{}
 			fmt.Printf("Loading model '%s'\n", opts.Model)
-			if err := loadOrUnloadModel(cmd, &opts); err != nil {
+			if err := loadModel(cmd, &opts); err != nil {
 				return err
 			}
 			continue
@ -283,13 +351,17 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					}
 					fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
 					opts.Options[args[2]] = fp[args[2]]
-				case "system":
+				case "system", "template":
 					if len(args) < 3 {
 						usageSet()
 						continue
 					}

-					multiline = MultilineSystem
+					if args[1] == "system" {
+						multiline = MultilineSystem
+					} else if args[1] == "template" {
+						multiline = MultilineTemplate
+					}

 					line := strings.Join(args[2:], " ")
 					line, ok := strings.CutPrefix(line, `"""`)
@ -309,17 +381,23 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						continue
 					}

-					opts.System = sb.String() // for display in modelfile
-					newMessage := api.Message{Role: "system", Content: sb.String()}
-					// Check if the slice is not empty and the last message is from 'system'
-					if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
-						// Replace the last message
-						opts.Messages[len(opts.Messages)-1] = newMessage
-					} else {
-						opts.Messages = append(opts.Messages, newMessage)
+					if args[1] == "system" {
+						opts.System = sb.String() // for display in modelfile
+						newMessage := api.Message{Role: "system", Content: sb.String()}
+						// Check if the slice is not empty and the last message is from 'system'
+						if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
+							// Replace the last message
+							opts.Messages[len(opts.Messages)-1] = newMessage
+						} else {
+							opts.Messages = append(opts.Messages, newMessage)
+						}
+						fmt.Println("Set system message.")
+						sb.Reset()
+					} else if args[1] == "template" {
+						opts.Template = sb.String()
+						fmt.Println("Set prompt template.")
+						sb.Reset()
 					}
-					fmt.Println("Set system message.")
-					sb.Reset()

 					sb.Reset()
 					continue
@ -338,9 +416,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					return err
 				}
 				req := &api.ShowRequest{
-					Name:    opts.Model,
-					System:  opts.System,
-					Options: opts.Options,
+					Name:     opts.Model,
+					System:   opts.System,
+					Template: opts.Template,
+					Options:  opts.Options,
 				}
 				resp, err := client.Show(cmd.Context(), req)
 				if err != nil {
@ -350,7 +429,15 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {

 				switch args[1] {
 				case "info":
-					_ = showInfo(resp, os.Stderr)
+					fmt.Println("Model details:")
+					if len(resp.Details.Families) > 0 {
+						fmt.Printf("Family              %s\n", strings.Join(resp.Details.Families, ", "))
+					} else if resp.Details.Family != "" {
+						fmt.Printf("Family              %s\n", resp.Details.Family)
+					}
+					fmt.Printf("Parameter Size      %s\n", resp.Details.ParameterSize)
+					fmt.Printf("Quantization Level  %s\n", resp.Details.QuantizationLevel)
+					fmt.Println("")
 				case "license":
 					if resp.License == "" {
 						fmt.Println("No license was specified for this model.")
@ -383,9 +470,12 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 						fmt.Println("No system message was specified for this model.")
 					}
 				case "template":
-					if resp.Template != "" {
+					switch {
+					case opts.Template != "":
+						fmt.Println(opts.Template + "\n")
+					case resp.Template != "":
 						fmt.Println(resp.Template)
-					} else {
+					default:
 						fmt.Println("No prompt template was specified for this model.")
 					}
 				default:
@ -442,6 +532,13 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 					return err
 				}

+				// clear all previous images for better responses
+				if len(images) > 0 {
+					for i := range opts.Messages {
+						opts.Messages[i].Images = nil
+					}
+				}
+
 				newMessage.Content = msg
 				newMessage.Images = images
 			}
@ -462,54 +559,60 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 }

 func buildModelfile(opts runOptions) string {
-	var f parser.File
-	f.Commands = append(f.Commands, parser.Command{Name: "model", Args: cmp.Or(opts.ParentModel, opts.Model)})
-
+	var mf strings.Builder
+	model := opts.ParentModel
+	if model == "" {
+		model = opts.Model
+	}
+	fmt.Fprintf(&mf, "FROM %s\n", model)
 	if opts.System != "" {
-		f.Commands = append(f.Commands, parser.Command{Name: "system", Args: opts.System})
+		fmt.Fprintf(&mf, "SYSTEM \"\"\"%s\"\"\"\n", opts.System)
 	}

-	keys := maps.Keys(opts.Options)
-	slices.Sort(keys)
+	if opts.Template != "" {
+		fmt.Fprintf(&mf, "TEMPLATE \"\"\"%s\"\"\"\n", opts.Template)
+	}
+
+	keys := make([]string, 0)
+	for k := range opts.Options {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
 	for _, k := range keys {
-		v := opts.Options[k]
-		var cmds []parser.Command
-		switch t := v.(type) {
-		case []string:
-			for _, s := range t {
-				cmds = append(cmds, parser.Command{Name: k, Args: s})
-			}
-		default:
-			cmds = append(cmds, parser.Command{Name: k, Args: fmt.Sprintf("%v", t)})
-		}
-
-		f.Commands = append(f.Commands, cmds...)
+		fmt.Fprintf(&mf, "PARAMETER %s %v\n", k, opts.Options[k])
 	}
+	fmt.Fprintln(&mf)

 	for _, msg := range opts.Messages {
-		f.Commands = append(f.Commands, parser.Command{Name: "message", Args: fmt.Sprintf("%s: %s", msg.Role, msg.Content)})
+		fmt.Fprintf(&mf, "MESSAGE %s \"\"\"%s\"\"\"\n", msg.Role, msg.Content)
 	}

-	return f.String()
+	return mf.String()
 }

 func normalizeFilePath(fp string) string {
-	return strings.NewReplacer(
-		"\\ ", " ", // Escaped space
-		"\\(", "(", // Escaped left parenthesis
-		"\\)", ")", // Escaped right parenthesis
-		"\\[", "[", // Escaped left square bracket
-		"\\]", "]", // Escaped right square bracket
-		"\\{", "{", // Escaped left curly brace
-		"\\}", "}", // Escaped right curly brace
-		"\\$", "$", // Escaped dollar sign
-		"\\&", "&", // Escaped ampersand
-		"\\;", ";", // Escaped semicolon
-		"\\'", "'", // Escaped single quote
-		"\\\\", "\\", // Escaped backslash
-		"\\*", "*", // Escaped asterisk
-		"\\?", "?", // Escaped question mark
-	).Replace(fp)
+	// Define a map of escaped characters and their replacements
+	replacements := map[string]string{
+		"\\ ":  " ",  // Escaped space
+		"\\(":  "(",  // Escaped left parenthesis
+		"\\)":  ")",  // Escaped right parenthesis
+		"\\[":  "[",  // Escaped left square bracket
+		"\\]":  "]",  // Escaped right square bracket
+		"\\{":  "{",  // Escaped left curly brace
+		"\\}":  "}",  // Escaped right curly brace
+		"\\$":  "$",  // Escaped dollar sign
+		"\\&":  "&",  // Escaped ampersand
+		"\\;":  ";",  // Escaped semicolon
+		"\\'":  "'",  // Escaped single quote
+		"\\\\": "\\", // Escaped backslash
+		"\\*":  "*",  // Escaped asterisk
+		"\\?":  "?",  // Escaped question mark
+	}
+
+	for escaped, actual := range replacements {
+		fp = strings.ReplaceAll(fp, escaped, actual)
+	}
+	return fp
 }

 func extractFileNames(input string) []string {
@ -529,9 +632,10 @@ func extractFileData(input string) (string, []api.ImageData, error) {
 	for _, fp := range filePaths {
 		nfp := normalizeFilePath(fp)
 		data, err := getImageData(nfp)
-		if errors.Is(err, os.ErrNotExist) {
-			continue
-		} else if err != nil {
+		if err != nil {
+			if os.IsNotExist(err) {
+				continue
+			}
 			fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
 			return "", imgs, err
 		}
@ -539,7 +643,7 @@ func extractFileData(input string) (string, []api.ImageData, error) {
 		input = strings.ReplaceAll(input, fp, "")
 		imgs = append(imgs, data)
 	}
-	return strings.TrimSpace(input), imgs, nil
+	return input, imgs, nil
 }

 func getImageData(filePath string) ([]byte, error) {
@ -569,7 +673,7 @@ func getImageData(filePath string) ([]byte, error) {
 	// Check if the file size exceeds 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
-		return nil, errors.New("file size exceeds maximum limit (100MB)")
+		return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
 	}

 	buf = make([]byte, info.Size())
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@ -1,9 +1,10 @@
 package cmd

 import (
+	"bytes"
 	"testing"
+	"text/template"

-	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"

 	"github.com/ollama/ollama/api"
@ -55,53 +56,61 @@ d:\path with\spaces\seven.svg inbetween7 c:\users\jdoe\eight.png inbetween8

 func TestModelfileBuilder(t *testing.T) {
 	opts := runOptions{
-		Model:  "hork",
-		System: "You are part horse and part shark, but all hork. Do horklike things",
+		Model:    "hork",
+		System:   "You are part horse and part shark, but all hork. Do horklike things",
+		Template: "This is a template.",
 		Messages: []api.Message{
 			{Role: "user", Content: "Hey there hork!"},
 			{Role: "assistant", Content: "Yes it is true, I am half horse, half shark."},
 		},
-		Options: map[string]any{
-			"temperature":      0.9,
-			"seed":             42,
-			"penalize_newline": false,
-			"stop":             []string{"hi", "there"},
-		},
+		Options: map[string]interface{}{},
 	}

-	t.Run("model", func(t *testing.T) {
-		expect := `FROM hork
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	opts.Options["temperature"] = 0.9
+	opts.Options["seed"] = 42
+	opts.Options["penalize_newline"] = false
+	opts.Options["stop"] = []string{"hi", "there"}
+
+	mf := buildModelfile(opts)
+	expectedModelfile := `FROM {{.Model}}
+SYSTEM """{{.System}}"""
+TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
-PARAMETER stop there
+PARAMETER stop [hi there]
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+
+MESSAGE user """Hey there hork!"""
+MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `

-		actual := buildModelfile(opts)
-		if diff := cmp.Diff(expect, actual); diff != "" {
-			t.Errorf("mismatch (-want +got):\n%s", diff)
-		}
-	})
+	tmpl, err := template.New("").Parse(expectedModelfile)
+	assert.Nil(t, err)

-	t.Run("parent model", func(t *testing.T) {
-		opts.ParentModel = "horseshark"
-		expect := `FROM horseshark
-SYSTEM You are part horse and part shark, but all hork. Do horklike things
+	var buf bytes.Buffer
+	err = tmpl.Execute(&buf, opts)
+	assert.Nil(t, err)
+	assert.Equal(t, buf.String(), mf)
+
+	opts.ParentModel = "horseshark"
+	mf = buildModelfile(opts)
+	expectedModelfile = `FROM {{.ParentModel}}
+SYSTEM """{{.System}}"""
+TEMPLATE """{{.Template}}"""
 PARAMETER penalize_newline false
 PARAMETER seed 42
-PARAMETER stop hi
-PARAMETER stop there
+PARAMETER stop [hi there]
 PARAMETER temperature 0.9
-MESSAGE user Hey there hork!
-MESSAGE assistant Yes it is true, I am half horse, half shark.
+
+MESSAGE user """Hey there hork!"""
+MESSAGE assistant """Yes it is true, I am half horse, half shark."""
 `
-		actual := buildModelfile(opts)
-		if diff := cmp.Diff(expect, actual); diff != "" {
-			t.Errorf("mismatch (-want +got):\n%s", diff)
-		}
-	})
+
+	tmpl, err = template.New("").Parse(expectedModelfile)
+	assert.Nil(t, err)
+
+	var parentBuf bytes.Buffer
+	err = tmpl.Execute(&parentBuf, opts)
+	assert.Nil(t, err)
+	assert.Equal(t, parentBuf.String(), mf)
 }
--- a/cmd/start.go
+++ b/cmd/start.go
@ -1,27 +0,0 @@
-//go:build darwin || windows
-
-package cmd
-
-import (
-	"context"
-	"errors"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-func waitForServer(ctx context.Context, client *api.Client) error {
-	// wait for the server to start
-	timeout := time.After(5 * time.Second)
-	tick := time.Tick(500 * time.Millisecond)
-	for {
-		select {
-		case <-timeout:
-			return errors.New("timed out waiting for server to start")
-		case <-tick:
-			if err := client.Heartbeat(ctx); err == nil {
-				return nil // server has started
-			}
-		}
-	}
-}
--- a/cmd/start_darwin.go
+++ b/cmd/start_darwin.go
@ -2,7 +2,7 @@ package cmd

 import (
 	"context"
-	"errors"
+	"fmt"
 	"os"
 	"os/exec"
 	"strings"
@ -20,7 +20,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 		return err
 	}
 	if !strings.Contains(link, "Ollama.app") {
-		return errors.New("could not find ollama app")
+		return fmt.Errorf("could not find ollama app")
 	}
 	path := strings.Split(link, "Ollama.app")
 	if err := exec.Command("/usr/bin/open", "-a", path[0]+"Ollama.app").Run(); err != nil {
--- a/cmd/start_default.go
+++ b/cmd/start_default.go
@ -4,11 +4,11 @@ package cmd

 import (
 	"context"
-	"errors"
+	"fmt"

 	"github.com/ollama/ollama/api"
 )

 func startApp(ctx context.Context, client *api.Client) error {
-	return errors.New("could not connect to ollama server, run 'ollama serve' to start it")
+	return fmt.Errorf("could not connect to ollama server, run 'ollama serve' to start it")
 }
--- a/cmd/start_windows.go
+++ b/cmd/start_windows.go
@ -31,7 +31,7 @@ func startApp(ctx context.Context, client *api.Client) error {
 			// Finally look in the path
 			appExe, err = exec.LookPath(AppName)
 			if err != nil {
-				return errors.New("could not locate ollama app")
+				return fmt.Errorf("could not locate ollama app")
 			}
 		}
 	}
--- a/convert/convert.go
+++ b/convert/convert.go
@ -1,232 +1,200 @@
 package convert

 import (
+	"cmp"
+	"encoding/binary"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
-	"io/fs"
 	"log/slog"
+	"os"
+	"path/filepath"
+	"slices"
 	"strings"

+	"google.golang.org/protobuf/proto"
+
+	"github.com/ollama/ollama/convert/sentencepiece"
 	"github.com/ollama/ollama/llm"
 )

-type ModelParameters struct {
-	Architectures []string `json:"architectures"`
-	VocabSize     uint32   `json:"vocab_size"`
+const (
+	_ int32 = iota
+	tokenTypeNormal
+	tokenTypeUnknown
+	tokenTypeControl
+	tokenTypeUserDefined
+	tokenTypeUnused
+	tokenTypeByte
+)
+
+type Params struct {
+	Architectures     []string `json:"architectures"`
+	VocabSize         int      `json:"vocab_size"`
+	HiddenSize        int      `json:"hidden_size"`       // n_embd
+	HiddenLayers      int      `json:"num_hidden_layers"` // n_layer
+	ContextSize       int      `json:"max_position_embeddings"`
+	IntermediateSize  int      `json:"intermediate_size"`
+	AttentionHeads    int      `json:"num_attention_heads"` // n_head
+	KeyValHeads       int      `json:"num_key_value_heads"`
+	NormEPS           float64  `json:"rms_norm_eps"`
+	BoSTokenID        int      `json:"bos_token_id"`
+	EoSTokenID        int      `json:"eos_token_id"`
+	HeadDimension     int      `json:"head_dim"`
+	PaddingTokenID    int      `json:"pad_token_id"`
+	RopeFrequencyBase float64  `json:"rope_theta"`
+
+	Experts     int `json:"num_local_experts"`
+	ExpertsUsed int `json:"num_experts_per_tok"`
+
+	PreTokenizer string
+
+	ByteOrder
 }

-type AdapterParameters struct {
-	Alpha          uint32 `json:"lora_alpha"`
-	LoraLayers     uint32 `json:"lora_layers"`
-	LoraParameters struct {
-		Rank  uint32  `json:"rank"`
-		Alpha float32 `json:"alpha"`
-		Scale float32 `json:"scale"`
-	} `json:"lora_parameters"`
+type ByteOrder interface {
+	binary.ByteOrder
+	binary.AppendByteOrder
 }

-func (ModelParameters) KV(t *Tokenizer) llm.KV {
-	kv := llm.KV{
-		"general.file_type":            uint32(1),
-		"general.quantization_version": uint32(2),
-		"tokenizer.ggml.pre":           t.Pre,
-		"tokenizer.ggml.model":         t.Vocabulary.Model,
-		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
-		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
-		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
-	}
-
-	if len(t.Merges) > 0 {
-		kv["tokenizer.ggml.merges"] = t.Merges
-	}
-
-	if t.Template != "" {
-		kv["tokenizer.chat_template"] = t.Template
-	}
-
-	for _, sv := range t.SpecialVocabulary {
-		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
-		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
-	}
-
-	return kv
+type ModelArch interface {
+	GetTensors() error
+	LoadVocab() error
+	WriteGGUF(io.WriteSeeker) error
 }

-func (p AdapterParameters) KV() llm.KV {
-	var alpha float32
-	if p.LoraParameters.Alpha == 0 {
-		alpha = float32(p.Alpha)
-	} else {
-		alpha = p.LoraParameters.Alpha
-	}
-
-	kv := llm.KV{
-		"adapter.lora.alpha": alpha,
-		"adapter.type":       "lora",
-		"general.file_type":  uint32(1),
-		"general.type":       "adapter",
-		"general.version":    "v0.2",
-	}
-
-	return kv
+type ModelFormat interface {
+	GetLayerName(string) (string, error)
+	GetTensors(string, *Params) ([]llm.Tensor, error)
+	GetParams(string) (*Params, error)
+	GetModelArch(string, string, *Params) (ModelArch, error)
 }

-func (ModelParameters) specialTokenTypes() []string {
-	return []string{
-		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
-	}
+type ModelData struct {
+	Path    string
+	Name    string
+	Params  *Params
+	Vocab   *Vocab
+	Tensors []llm.Tensor
+	Format  ModelFormat
 }

-func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
-	return llm.WriteGGUF(ws, kv, ts)
-}
-
-func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
-	return llm.WriteGGUF(ws, kv, ts)
-}
-
-type ModelConverter interface {
-	// KV maps parameters to LLM key-values
-	KV(*Tokenizer) llm.KV
-	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
-	Tensors([]Tensor) []llm.Tensor
-	// Replacements returns a list of string pairs to replace in tensor names.
-	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
-	Replacements() []string
-
-	// specialTokenTypes returns any special token types the model uses
-	specialTokenTypes() []string
-	// writeFile writes the model to the provided io.WriteSeeker
-	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
-}
-
-type moreParser interface {
-	parseMore(fs.FS) error
-}
-
-type AdapterConverter interface {
-	// KV maps parameters to LLM key-values
-	KV(llm.KV) llm.KV
-	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
-	Tensors([]Tensor) []llm.Tensor
-	// Replacements returns a list of string pairs to replace in tensor names.
-	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
-	Replacements() []string
-
-	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
-}
-
-func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
-	bts, err := fs.ReadFile(fsys, "adapter_config.json")
+func GetModelFormat(dirname string) (ModelFormat, error) {
+	files, err := filepath.Glob(filepath.Join(dirname, "*"))
 	if err != nil {
-		return err
+		return nil, err
 	}

-	var p AdapterParameters
-	if err := json.Unmarshal(bts, &p); err != nil {
-		return err
-	}
-
-	arch, ok := baseKV["general.architecture"]
-	if !ok {
-		return errors.New("architecture not set for the base model")
-	}
-
-	var conv AdapterConverter
-	switch arch {
-	case "llama":
-		conv = &llamaAdapter{}
-	case "gemma2":
-		conv = &gemma2Adapter{}
-	default:
-		return errors.New("unsupported architecture")
-	}
-
-	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
-	if err != nil {
-		return err
-	}
-
-	if err := json.Unmarshal(bts, conv); err != nil {
-		return err
-	}
-
-	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
-}
-
-// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
-// and files it finds in the input path.
-// Supported input model formats include safetensors.
-// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
-func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
-	bts, err := fs.ReadFile(fsys, "config.json")
-	if err != nil {
-		return err
-	}
-
-	var p ModelParameters
-	if err := json.Unmarshal(bts, &p); err != nil {
-		return err
-	}
-
-	if len(p.Architectures) < 1 {
-		return errors.New("unknown architecture")
-	}
-
-	var conv ModelConverter
-	switch p.Architectures[0] {
-	case "LlamaForCausalLM", "MistralForCausalLM":
-		conv = &llamaModel{}
-	case "MixtralForCausalLM":
-		conv = &mixtralModel{}
-	case "GemmaForCausalLM":
-		conv = &gemmaModel{}
-	case "Gemma2ForCausalLM":
-		conv = &gemma2Model{}
-	case "Phi3ForCausalLM":
-		conv = &phi3Model{}
-	case "BertModel":
-		conv = &bertModel{}
-	default:
-		return errors.New("unsupported architecture")
-	}
-
-	if err := json.Unmarshal(bts, conv); err != nil {
-		return err
-	}
-
-	if t, ok := conv.(moreParser); ok {
-		if err := t.parseMore(fsys); err != nil {
-			return err
+	for _, fn := range files {
+		if strings.HasSuffix(fn, ".safetensors") {
+			return &SafetensorFormat{}, nil
+		} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
+			slog.Debug("model is torch")
+			return &TorchFormat{}, nil
 		}
 	}

-	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
-	if err != nil {
-		return err
-	}
-
-	vocabSize := int(p.VocabSize)
-	switch {
-	case vocabSize > len(t.Vocabulary.Tokens):
-		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
-		for i := range vocabSize - len(t.Vocabulary.Tokens) {
-			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
-			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
-			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
-		}
-	case vocabSize < len(t.Vocabulary.Tokens):
-		return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
-	default:
-		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
-	}
-
-	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
-	if err != nil {
-		return err
-	}
-
-	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
+	return nil, fmt.Errorf("couldn't determine model format")
+}
+
+// Details on gguf's tokenizer can be found at:
+// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer
+type Vocab struct {
+	Tokens []string
+	Scores []float32
+	Types  []int32
+	Merges []string
+}
+
+func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
+	slog.Info(fmt.Sprintf("reading vocab from %s", filepath.Join(dirpath, "tokenizer.model")))
+	in, err := os.ReadFile(filepath.Join(dirpath, "tokenizer.model"))
+	if err != nil {
+		return nil, err
+	}
+
+	// To regenerate sentencepiece from the protobufs use:
+	// protoc -I=./ --go_out=./ sentencepiece_model.proto
+	modelProto := &sentencepiece.ModelProto{}
+	if err := proto.Unmarshal(in, modelProto); err != nil {
+		return nil, err
+	}
+
+	v := &Vocab{
+		Tokens: make([]string, 0),
+		Scores: make([]float32, 0),
+		Types:  make([]int32, 0),
+	}
+
+	pieces := modelProto.GetPieces()
+	for _, p := range pieces {
+		v.Tokens = append(v.Tokens, p.GetPiece())
+		v.Scores = append(v.Scores, p.GetScore())
+		t := p.GetType()
+		switch t {
+		case sentencepiece.ModelProto_SentencePiece_UNKNOWN:
+		case sentencepiece.ModelProto_SentencePiece_CONTROL:
+		case sentencepiece.ModelProto_SentencePiece_UNUSED:
+		case sentencepiece.ModelProto_SentencePiece_BYTE:
+		default:
+			t = sentencepiece.ModelProto_SentencePiece_NORMAL
+		}
+		v.Types = append(v.Types, int32(t))
+	}
+
+	slog.Info(fmt.Sprintf("vocab size: %d", len(v.Tokens)))
+
+	// add any additional tokens
+	addIn, err := os.ReadFile(filepath.Join(dirpath, "added_tokens.json"))
+	if os.IsNotExist(err) {
+		return v, nil
+	} else if err != nil {
+		return nil, err
+	}
+
+	slog.Info("reading user defined tokens")
+
+	var extraTokenData map[string]int
+	if err := json.Unmarshal(addIn, &extraTokenData); err != nil {
+		return nil, err
+	}
+
+	type token struct {
+		key string
+		pos int
+	}
+
+	extraTokens := make([]token, 0)
+	for k, id := range extraTokenData {
+		extraTokens = append(extraTokens, token{k, id})
+	}
+
+	slices.SortFunc(extraTokens, func(a, b token) int {
+		return cmp.Compare(a.pos, b.pos)
+	})
+
+	numToks := len(v.Tokens)
+
+	for cnt, t := range extraTokens {
+		// the token id should match the specific index for the total number of tokens
+		if t.pos != cnt+numToks {
+			return nil, fmt.Errorf("token ID '%d' for '%s' doesn't match total token size", t.pos, t.key)
+		}
+		v.Tokens = append(v.Tokens, t.key)
+		v.Scores = append(v.Scores, -1000.0)
+		v.Types = append(v.Types, tokenTypeUserDefined)
+	}
+	slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
+
+	if params.VocabSize > len(v.Tokens) {
+		missingTokens := params.VocabSize - len(v.Tokens)
+		slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
+		for cnt := 0; cnt < missingTokens; cnt++ {
+			v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
+			v.Scores = append(v.Scores, -1)
+			v.Types = append(v.Types, tokenTypeUserDefined)
+		}
+	}
+
+	return v, nil
 }
--- a/convert/convert_bert.go
+++ b/convert/convert_bert.go
@ -1,174 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"encoding/json"
-	"io/fs"
-	"path/filepath"
-	"slices"
-	"strings"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type bertModel struct {
-	ModelParameters
-	NLayers               uint32  `json:"n_layers"`
-	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
-	NLayer                uint32  `json:"n_layer"`
-	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
-	NCtx                  uint32  `json:"n_ctx"`
-	HiddenSize            uint32  `json:"hidden_size"`
-	NEmbd                 uint32  `json:"n_embd"`
-	IntermediateSize      uint32  `json:"intermediate_size"`
-	NInner                uint32  `json:"n_inner"`
-	NumAttentionHeads     uint32  `json:"num_attention_heads"`
-	NHead                 uint32  `json:"n_head"`
-	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
-	LayerNormEPS          float32 `json:"layer_norm_eps"`
-	LayerNormEpsilon      float32 `json:"layer_norm_epsilon"`
-	NormEpsilon           float32 `json:"norm_epsilon"`
-
-	PoolingType uint32
-}
-
-var (
-	_ ModelConverter = (*bertModel)(nil)
-	_ moreParser     = (*bertModel)(nil)
-)
-
-func (p *bertModel) parseMore(fsys fs.FS) error {
-	bts, err := fs.ReadFile(fsys, "modules.json")
-	if err != nil {
-		return err
-	}
-
-	var modules []struct {
-		Type string `json:"type"`
-		Path string `json:"path"`
-	}
-
-	if err := json.Unmarshal(bts, &modules); err != nil {
-		return err
-	}
-
-	var pooling string
-	for _, m := range modules {
-		if m.Type == "sentence_transformers.models.Pooling" {
-			pooling = m.Path
-			break
-		}
-	}
-
-	if pooling != "" {
-		bts, err := fs.ReadFile(fsys, filepath.Join(pooling, "config.json"))
-		if err != nil {
-			return err
-		}
-
-		var pc struct {
-			PoolingModeCLSToken   bool `json:"pooling_mode_cls_token"`
-			PoolingModeMeanTokens bool `json:"pooling_mode_mean_tokens"`
-		}
-
-		if err := json.Unmarshal(bts, &pc); err != nil {
-			return err
-		}
-
-		if pc.PoolingModeMeanTokens {
-			p.PoolingType = 1
-		} else if pc.PoolingModeCLSToken {
-			p.PoolingType = 2
-		}
-	}
-
-	return nil
-}
-
-func (p *bertModel) KV(t *Tokenizer) llm.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "bert"
-	kv["bert.attention.causal"] = false
-	kv["bert.pooling_type"] = p.PoolingType
-
-	kv["bert.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
-
-	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
-		kv["bert.context_length"] = contextLength
-	}
-
-	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
-		kv["bert.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
-	}
-
-	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
-		kv["bert.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
-	}
-
-	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
-		kv["bert.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
-	}
-
-	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
-		kv["bert.attention.layer_norm_epsilon"] = layerNormEpsilon
-	}
-
-	kv["tokenizer.ggml.model"] = "bert"
-	kv["tokenizer.ggml.token_type_count"] = uint32(2)
-
-	// convert to phantom space tokens
-	for i, e := range t.Tokens {
-		if strings.HasPrefix(e, "[") && strings.HasSuffix(e, "]") {
-			// noop
-		} else if strings.HasPrefix(e, "##") {
-			t.Tokens[i] = e[2:]
-		} else {
-			t.Tokens[i] = "\u2581" + e
-		}
-	}
-
-	kv["tokenizer.ggml.tokens"] = t.Tokens
-
-	return kv
-}
-
-func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
-	var out []llm.Tensor
-	for _, t := range ts {
-		if slices.Contains([]string{
-			"embeddings.position_ids",
-			"pooler.dense.weight",
-			"pooler.dense.bias",
-		}, t.Name()) {
-			continue
-		}
-
-		out = append(out, llm.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (bertModel) Replacements() []string {
-	return []string{
-		"encoder.layer", "blk",
-		"encoder.layers", "blk",
-		"embeddings.word_embeddings", "token_embd",
-		"embeddings.token_type_embeddings", "token_types",
-		"embeddings.LayerNorm", "token_embd_norm",
-		"embeddings.position_embeddings", "position_embd",
-		"attention.self.query", "attn_q",
-		"attention.self.key", "attn_k",
-		"attention.self.value", "attn_v",
-		"attention.output.dense", "attn_output",
-		"attention.output.LayerNorm", "attn_output_norm",
-		"intermediate.dense", "ffn_up",
-		"output.dense", "ffn_down",
-		"output.LayerNorm", "layer_output_norm",
-	}
-}
--- a/convert/convert_gemma.go
+++ b/convert/convert_gemma.go
@ -1,100 +0,0 @@
-package convert
-
-import (
-	"strings"
-
-	"github.com/pdevine/tensor"
-	"github.com/pdevine/tensor/native"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type gemmaModel struct {
-	ModelParameters
-	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
-	HiddenSize            uint32  `json:"hidden_size"`
-	HiddenLayers          uint32  `json:"num_hidden_layers"`
-	IntermediateSize      uint32  `json:"intermediate_size"`
-	NumAttentionHeads     uint32  `json:"num_attention_heads"`
-	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
-	RMSNormEPS            float32 `json:"rms_norm_eps"`
-	HeadDim               uint32  `json:"head_dim"`
-}
-
-var _ ModelConverter = (*gemmaModel)(nil)
-
-func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "gemma"
-	kv["gemma.context_length"] = p.MaxPositionEmbeddings
-	kv["gemma.embedding_length"] = p.HiddenSize
-	kv["gemma.block_count"] = p.HiddenLayers
-	kv["gemma.feed_forward_length"] = p.IntermediateSize
-	kv["gemma.attention.head_count"] = p.NumAttentionHeads
-	kv["gemma.attention.head_count_kv"] = p.NumKeyValueHeads
-	kv["gemma.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	kv["gemma.attention.key_length"] = p.HeadDim
-	kv["gemma.attention.value_length"] = p.HeadDim
-	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
-	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
-	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
-	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
-	return kv
-}
-
-func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
-	var out []llm.Tensor
-	for _, t := range ts {
-		if strings.HasSuffix(t.Name(), "_norm.weight") {
-			t.SetRepacker(p.addOne)
-		}
-
-		out = append(out, llm.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *gemmaModel) Replacements() []string {
-	return []string{
-		"model.embed_tokens", "token_embd",
-		"model.norm", "output_norm",
-		"model.layers", "blk",
-		"input_layernorm", "attn_norm",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_output",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"post_attention_layernorm", "ffn_norm",
-	}
-}
-
-func (*gemmaModel) addOne(_ string, data []float32, shape []uint64) ([]float32, error) {
-	n := tensor.New(tensor.WithShape(int(shape[0])), tensor.WithBacking(data))
-	ones := tensor.Ones(tensor.Float32, int(shape[0]))
-
-	n, err := n.Add(ones)
-	if err != nil {
-		return nil, err
-	}
-
-	ts, err := native.SelectF32(n, 0)
-	if err != nil {
-		return nil, err
-	}
-
-	var f32s []float32
-	for _, t := range ts {
-		f32s = append(f32s, t...)
-	}
-
-	return f32s, nil
-}
--- a/convert/convert_gemma2.go
+++ b/convert/convert_gemma2.go
@ -1,53 +0,0 @@
-package convert
-
-import (
-	"github.com/ollama/ollama/llm"
-)
-
-type gemma2Model struct {
-	gemmaModel
-	SlidingWindow         uint32  `json:"sliding_window"`
-	AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
-	FinalLogitSoftcap     float32 `json:"final_logit_softcapping"`
-}
-
-func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "gemma2"
-	kv["gemma2.context_length"] = p.MaxPositionEmbeddings
-	kv["gemma2.embedding_length"] = p.HiddenSize
-	kv["gemma2.block_count"] = p.HiddenLayers
-	kv["gemma2.feed_forward_length"] = p.IntermediateSize
-	kv["gemma2.attention.head_count"] = p.NumAttentionHeads
-	kv["gemma2.attention.head_count_kv"] = p.NumKeyValueHeads
-	kv["gemma2.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	kv["gemma2.attention.key_length"] = p.HeadDim
-	kv["gemma2.attention.value_length"] = p.HeadDim
-	kv["gemma2.attention.sliding_window"] = p.SlidingWindow
-	kv["gemma2.attn_logit_softcapping"] = p.AttentionLogitSoftcap
-	kv["gemma2.final_logit_softcapping"] = p.FinalLogitSoftcap
-	kv["tokenizer.ggml.eot_token_id"] = uint32(107)
-	kv["tokenizer.ggml.middle_token_id"] = uint32(68)
-	kv["tokenizer.ggml.prefix_token_id"] = uint32(67)
-	kv["tokenizer.ggml.suffix_token_id"] = uint32(69)
-	return kv
-}
-
-func (p *gemma2Model) Replacements() []string {
-	return []string{
-		"model.embed_tokens", "token_embd",
-		"model.norm", "output_norm",
-		"model.layers", "blk",
-		"input_layernorm", "attn_norm",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_output",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"post_attention_layernorm", "post_attention_norm",
-		"pre_feedforward_layernorm", "ffn_norm",
-		"post_feedforward_layernorm", "post_ffw_norm",
-	}
-}
--- a/convert/convert_gemma2_adapter.go
+++ b/convert/convert_gemma2_adapter.go
@ -1,91 +0,0 @@
-package convert
-
-import (
-	"strings"
-
-	"github.com/pdevine/tensor"
-	"github.com/pdevine/tensor/native"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type gemma2Adapter struct {
-	AdapterParameters
-}
-
-var _ AdapterConverter = (*gemma2Adapter)(nil)
-
-func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
-	kv := p.AdapterParameters.KV()
-	kv["general.architecture"] = "gemma2"
-	return kv
-}
-
-func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
-	var out []llm.Tensor
-	for _, t := range ts {
-		shape := t.Shape()
-		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
-			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
-			shape[0], shape[1] = shape[1], shape[0]
-			t.SetRepacker(p.repack)
-		}
-
-		out = append(out, llm.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *gemma2Adapter) Replacements() []string {
-	return []string{
-		"base_model.model.", "",
-		"model.layers", "blk",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_output",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"lora_A.weight", "weight.lora_a",
-		"lora_B.weight", "weight.lora_b",
-		"lora_a", "weight.lora_a",
-		"lora_b", "weight.lora_b",
-	}
-}
-
-func (p *gemma2Adapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
-	dims := []int{int(shape[1]), int(shape[0])}
-
-	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-
-	if err := n.T(1, 0); err != nil {
-		return nil, err
-	}
-
-	if err := n.Reshape(dims...); err != nil {
-		return nil, err
-	}
-
-	if err := n.Transpose(); err != nil {
-		return nil, err
-	}
-
-	ts, err := native.SelectF32(n, 1)
-	if err != nil {
-		return nil, err
-	}
-
-	var f32s []float32
-	for _, t := range ts {
-		f32s = append(f32s, t...)
-	}
-
-	return f32s, nil
-}
--- a/convert/convert_llama.go
+++ b/convert/convert_llama.go
@ -1,213 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"fmt"
-	"math"
-	"strings"
-
-	"github.com/pdevine/tensor"
-	"github.com/pdevine/tensor/native"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type llamaModel struct {
-	ModelParameters
-	NLayers               uint32  `json:"n_layers"`
-	NumHiddenLayers       uint32  `json:"num_hidden_layers"`
-	NLayer                uint32  `json:"n_layer"`
-	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
-	NCtx                  uint32  `json:"n_ctx"`
-	HiddenSize            uint32  `json:"hidden_size"`
-	NEmbd                 uint32  `json:"n_embd"`
-	IntermediateSize      uint32  `json:"intermediate_size"`
-	NInner                uint32  `json:"n_inner"`
-	NumAttentionHeads     uint32  `json:"num_attention_heads"`
-	NHead                 uint32  `json:"n_head"`
-	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
-	RopeTheta             float32 `json:"rope_theta"`
-	RopeScaling           struct {
-		Type                            string  `json:"type"`
-		RopeType                        string  `json:"rope_type"`
-		Factor                          float32 `json:"factor"`
-		LowFrequencyFactor              float32 `json:"low_freq_factor"`
-		HighFrequencyFactor             float32 `json:"high_freq_factor"`
-		OriginalMaxPositionalEmbeddings uint32  `json:"original_max_positional_embeddings"`
-
-		factors ropeFactor
-	} `json:"rope_scaling"`
-	RMSNormEPS       float32 `json:"rms_norm_eps"`
-	LayerNormEPS     float32 `json:"layer_norm_eps"`
-	LayerNormEpsilon float32 `json:"layer_norm_epsilon"`
-	NormEpsilon      float32 `json:"norm_epsilon"`
-	HeadDim          uint32  `json:"head_dim"`
-}
-
-var _ ModelConverter = (*llamaModel)(nil)
-
-func (p *llamaModel) KV(t *Tokenizer) llm.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "llama"
-	kv["llama.vocab_size"] = p.VocabSize
-
-	kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
-
-	if contextLength := cmp.Or(p.MaxPositionEmbeddings, p.NCtx); contextLength > 0 {
-		kv["llama.context_length"] = contextLength
-	}
-
-	if embeddingLength := cmp.Or(p.HiddenSize, p.NEmbd); embeddingLength > 0 {
-		kv["llama.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
-	}
-
-	if feedForwardLength := cmp.Or(p.IntermediateSize, p.NInner); feedForwardLength > 0 {
-		kv["llama.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
-	}
-
-	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
-		kv["llama.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
-		kv["llama.rope.dimension_count"] = p.HiddenSize / headCount
-	}
-
-	if p.RopeTheta > 0 {
-		kv["llama.rope.freq_base"] = p.RopeTheta
-	}
-
-	if p.RopeScaling.Type == "linear" {
-		kv["llama.rope.scaling.type"] = p.RopeScaling.Type
-		kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
-	} else if p.RopeScaling.RopeType == "llama3" {
-		dim := p.HiddenSize / p.NumAttentionHeads
-		for i := uint32(0); i < dim; i += 2 {
-			factor := cmp.Or(p.RopeScaling.Factor, 8.0)
-			factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
-			factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
-
-			original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
-			lambdaLow := float32(original) / factorLow
-			lambdaHigh := float32(original) / factorHigh
-
-			lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
-			if lambda < float64(lambdaHigh) {
-				p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
-			} else if lambda > float64(lambdaLow) {
-				p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
-			} else {
-				smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
-				p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
-			}
-		}
-	}
-
-	if p.NumKeyValueHeads > 0 {
-		kv["llama.attention.head_count_kv"] = p.NumKeyValueHeads
-	}
-
-	if p.RMSNormEPS > 0 {
-		kv["llama.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	}
-
-	if layerNormEpsilon := cmp.Or(p.LayerNormEPS, p.LayerNormEpsilon, p.NormEpsilon); layerNormEpsilon > 0 {
-		kv["llama.attention.layer_norm_epsilon"] = layerNormEpsilon
-	}
-
-	if p.HeadDim > 0 {
-		kv["llama.attention.key_length"] = p.HeadDim
-		kv["llama.attention.value_length"] = p.HeadDim
-	}
-
-	return kv
-}
-
-func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
-	var out []llm.Tensor
-
-	if p.RopeScaling.factors != nil {
-		out = append(out, llm.Tensor{
-			Name:     "rope_freqs.weight",
-			Kind:     0,
-			Shape:    []uint64{uint64(len(p.RopeScaling.factors))},
-			WriterTo: p.RopeScaling.factors,
-		})
-	}
-
-	for _, t := range ts {
-		if strings.HasSuffix(t.Name(), "attn_q.weight") ||
-			strings.HasSuffix(t.Name(), "attn_k.weight") {
-			t.SetRepacker(p.repack)
-		}
-
-		out = append(out, llm.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *llamaModel) Replacements() []string {
-	return []string{
-		"lm_head", "output",
-		"model.embed_tokens", "token_embd",
-		"model.norm", "output_norm",
-		"model.layers", "blk",
-		"input_layernorm", "attn_norm",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_output",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"post_attention_layernorm", "ffn_norm",
-	}
-}
-
-func (p *llamaModel) repack(name string, data []float32, shape []uint64) ([]float32, error) {
-	var dims []int
-	for _, dim := range shape {
-		dims = append(dims, int(dim))
-	}
-
-	var heads uint32
-	if strings.HasSuffix(name, "attn_q.weight") {
-		heads = p.NumAttentionHeads
-	} else if strings.HasSuffix(name, "attn_k.weight") {
-		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
-	} else {
-		return nil, fmt.Errorf("unknown tensor for repack: %s", name)
-	}
-
-	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
-		return nil, err
-	}
-
-	if err := n.T(0, 2, 1, 3); err != nil {
-		return nil, err
-	}
-
-	if err := n.Reshape(dims...); err != nil {
-		return nil, err
-	}
-
-	if err := n.Transpose(); err != nil {
-		return nil, err
-	}
-
-	ts, err := native.SelectF32(n, 1)
-	if err != nil {
-		return nil, err
-	}
-
-	var f32s []float32
-	for _, t := range ts {
-		f32s = append(f32s, t...)
-	}
-
-	return f32s, nil
-}
--- a/convert/convert_llama_adapter.go
+++ b/convert/convert_llama_adapter.go
@ -1,169 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"strings"
-
-	"github.com/pdevine/tensor"
-	"github.com/pdevine/tensor/native"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type llamaAdapter struct {
-	AdapterParameters
-	NumAttentionHeads uint32 `json:"num_attention_heads"`
-	NumKeyValueHeads  uint32 `json:"num_key_value_heads"`
-}
-
-var _ AdapterConverter = (*llamaAdapter)(nil)
-
-func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
-	kv := p.AdapterParameters.KV()
-	kv["general.architecture"] = "llama"
-	kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
-	kv["llama.attention.head_count_kv"] = baseKV["llama.attention.head_count_kv"]
-
-	p.NumAttentionHeads = baseKV["llama.attention.head_count"].(uint32)
-
-	return kv
-}
-
-func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
-	var out []llm.Tensor
-	for _, t := range ts {
-		shape := t.Shape()
-		if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
-			(strings.HasSuffix(t.Name(), "weight.lora_b") && shape[0] < shape[1]) {
-			shape[0], shape[1] = shape[1], shape[0]
-			t.SetRepacker(p.repackAndTranspose)
-		} else {
-			t.SetRepacker(p.repack)
-		}
-
-		out = append(out, llm.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    shape,
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *llamaAdapter) Replacements() []string {
-	return []string{
-		"base_model.model.", "",
-		"model.layers", "blk",
-		"self_attn.q_proj", "attn_q",
-		"self_attn.k_proj", "attn_k",
-		"self_attn.v_proj", "attn_v",
-		"self_attn.o_proj", "attn_output",
-		"mlp.gate_proj", "ffn_gate",
-		"mlp.down_proj", "ffn_down",
-		"mlp.up_proj", "ffn_up",
-		"lora_A.weight", "weight.lora_a",
-		"lora_B.weight", "weight.lora_b",
-		"lora_a", "weight.lora_a",
-		"lora_b", "weight.lora_b",
-	}
-}
-
-func (p *llamaAdapter) repack(name string, data []float32, shape []uint64) ([]float32, error) {
-	dims := []int{int(shape[1]), int(shape[0])}
-
-	var heads uint32
-	if strings.HasSuffix(name, "attn_q.weight.lora_a") {
-		heads = p.NumAttentionHeads
-	} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
-		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
-	} else {
-		return data, nil
-	}
-
-	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-
-	if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
-		return nil, err
-	}
-
-	if err := n.T(0, 2, 1, 3); err != nil {
-		return nil, err
-	}
-
-	if err := n.Reshape(dims...); err != nil {
-		return nil, err
-	}
-
-	if err := n.Transpose(); err != nil {
-		return nil, err
-	}
-
-	ts, err := native.SelectF32(n, 1)
-	if err != nil {
-		return nil, err
-	}
-
-	var f32s []float32
-	for _, t := range ts {
-		f32s = append(f32s, t...)
-	}
-
-	return f32s, nil
-}
-
-func (p *llamaAdapter) repackAndTranspose(name string, data []float32, shape []uint64) ([]float32, error) {
-	dims := []int{int(shape[1]), int(shape[0])}
-
-	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
-
-	var heads uint32
-	if strings.HasSuffix(name, "attn_q.weight.lora_a") {
-		heads = p.NumAttentionHeads
-	} else if strings.HasSuffix(name, "attn_k.weight.lora_a") {
-		heads = cmp.Or(p.NumKeyValueHeads, p.NumAttentionHeads)
-	}
-
-	if heads > 0 {
-		if err := n.Reshape(append([]int{int(heads), 2, dims[0] / int(heads) / 2}, dims[1:]...)...); err != nil {
-			return nil, err
-		}
-
-		if err := n.T(0, 2, 1, 3); err != nil {
-			return nil, err
-		}
-
-		if err := n.Reshape(dims...); err != nil {
-			return nil, err
-		}
-
-		if err := n.Transpose(); err != nil {
-			return nil, err
-		}
-	}
-
-	if err := n.T(1, 0); err != nil {
-		return nil, err
-	}
-
-	if err := n.Reshape(dims...); err != nil {
-		return nil, err
-	}
-
-	if err := n.Transpose(); err != nil {
-		return nil, err
-	}
-
-	ts, err := native.SelectF32(n, 1)
-	if err != nil {
-		return nil, err
-	}
-
-	var f32s []float32
-	for _, t := range ts {
-		f32s = append(f32s, t...)
-	}
-
-	return f32s, nil
-}
--- a/convert/convert_mixtral.go
+++ b/convert/convert_mixtral.go
@ -1,94 +0,0 @@
-package convert
-
-import (
-	"fmt"
-	"io"
-	"slices"
-	"strings"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type mixtralModel struct {
-	llamaModel
-	NumLocalExperts    uint32 `json:"num_local_experts"`
-	NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
-}
-
-func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
-	kv := p.llamaModel.KV(t)
-
-	if p.NumLocalExperts > 0 {
-		kv["llama.expert_count"] = p.NumLocalExperts
-	}
-
-	if p.NumExpertsPerToken > 0 {
-		kv["llama.expert_used_count"] = p.NumExpertsPerToken
-	}
-
-	return kv
-}
-
-func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
-	oldnew := []string{
-		"model.layers", "blk",
-		"w1", "ffn_gate_exps",
-		"w2", "ffn_down_exps",
-		"w3", "ffn_up_exps",
-	}
-
-	for i := range p.NumLocalExperts {
-		oldnew = append(oldnew, fmt.Sprintf(".block_sparse_moe.experts.%d.", i), ".")
-	}
-
-	// group experts of the same layer (model.layers.%d) and type (w[123]) into a single tensor
-	namer := strings.NewReplacer(oldnew...)
-	experts := make(map[string]experts)
-
-	// merge experts into a single tensor while removing them from ts
-	ts = slices.DeleteFunc(ts, func(t Tensor) bool {
-		if !strings.Contains(t.Name(), ".block_sparse_moe.experts.") {
-			return false
-		}
-
-		name := namer.Replace(t.Name())
-		experts[name] = append(experts[name], t)
-		return true
-	})
-
-	var out []llm.Tensor
-	for n, e := range experts {
-		// TODO(mxyng): sanity check experts
-		out = append(out, llm.Tensor{
-			Name:     n,
-			Kind:     e[0].Kind(),
-			Shape:    append([]uint64{uint64(len(e))}, e[0].Shape()...),
-			WriterTo: e,
-		})
-	}
-
-	return append(out, p.llamaModel.Tensors(ts)...)
-}
-
-func (p *mixtralModel) Replacements() []string {
-	return append(
-		p.llamaModel.Replacements(),
-		"block_sparse_moe.gate", "ffn_gate_inp",
-	)
-}
-
-type experts []Tensor
-
-func (e experts) WriteTo(w io.Writer) (int64, error) {
-	// TODO(mxyng): experts _should_ be numerically sorted by expert but this should check
-	for _, t := range e {
-		// the canonical merged experts tensor stacks all experts along a new, 0 axis,
-		// e.g. `tensor.Stack(0, e[0], e[1:]...)`, which requires allocating temporary buffers
-		// this accomplishes the same thing by writing each expert tensor in sequence
-		if _, err := t.WriteTo(w); err != nil {
-			return 0, err
-		}
-	}
-
-	return 0, nil
-}
--- a/convert/convert_phi3.go
+++ b/convert/convert_phi3.go
@ -1,123 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"encoding/binary"
-	"io"
-	"math"
-	"strings"
-	"sync"
-
-	"github.com/ollama/ollama/llm"
-)
-
-type phi3Model struct {
-	ModelParameters
-	NumHiddenLayers   uint32  `json:"num_hidden_layers"`
-	NLayers           uint32  `json:"n_layers"`
-	HiddenSize        uint32  `json:"hidden_size"`
-	NEmbd             uint32  `json:"n_embd"`
-	IntermediateSize  uint32  `json:"intermediate_size"`
-	NumAttentionHeads uint32  `json:"num_attention_heads"`
-	NHead             uint32  `json:"n_head"`
-	NumKeyValueHeads  uint32  `json:"num_key_value_heads"`
-	NHeadKV           uint32  `json:"n_head_kv"`
-	RopeTheta         float32 `json:"rope_theta"`
-	RopeScaling       struct {
-		Type        string     `json:"type"`
-		LongFactor  ropeFactor `json:"long_factor"`
-		ShortFactor ropeFactor `json:"short_factor"`
-	} `json:"rope_scaling"`
-	RMSNormEPS                    float32 `json:"rms_norm_eps"`
-	NPositions                    uint32  `json:"n_positions"`
-	MaxPositionEmbeddings         uint32  `json:"max_position_embeddings"`
-	OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
-	SlidingWindow                 uint32  `json:"sliding_window"`
-}
-
-var _ ModelConverter = (*phi3Model)(nil)
-
-func (p *phi3Model) KV(t *Tokenizer) llm.KV {
-	kv := p.ModelParameters.KV(t)
-	kv["general.architecture"] = "phi3"
-	kv["phi3.context_length"] = p.MaxPositionEmbeddings
-	kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
-	kv["phi3.feed_forward_length"] = p.IntermediateSize
-	kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
-	kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
-	kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
-	kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
-	kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
-	kv["phi3.rope.freq_base"] = p.RopeTheta
-	kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
-	kv["phi3.attention.sliding_window"] = p.SlidingWindow
-
-	scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
-
-	switch p.RopeScaling.Type {
-	case "":
-		// no scaling
-	case "su", "longrope":
-		kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
-	case "yarn":
-		kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
-	default:
-		panic("unknown rope scaling type")
-	}
-
-	return kv
-}
-
-func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
-	var addRopeFactors sync.Once
-
-	out := make([]llm.Tensor, 0, len(ts)+2)
-	for _, t := range ts {
-		if strings.HasPrefix(t.Name(), "blk.0.") {
-			addRopeFactors.Do(func() {
-				out = append(out, llm.Tensor{
-					Name:     "rope_factors_long.weight",
-					Kind:     0,
-					Shape:    []uint64{uint64(len(p.RopeScaling.LongFactor))},
-					WriterTo: p.RopeScaling.LongFactor,
-				}, llm.Tensor{
-					Name:     "rope_factors_short.weight",
-					Kind:     0,
-					Shape:    []uint64{uint64(len(p.RopeScaling.ShortFactor))},
-					WriterTo: p.RopeScaling.ShortFactor,
-				})
-			})
-		}
-
-		out = append(out, llm.Tensor{
-			Name:     t.Name(),
-			Kind:     t.Kind(),
-			Shape:    t.Shape(),
-			WriterTo: t,
-		})
-	}
-
-	return out
-}
-
-func (p *phi3Model) Replacements() []string {
-	return []string{
-		"lm_head", "output",
-		"model.embed_tokens", "token_embd",
-		"model.norm", "output_norm",
-		"model.layers", "blk",
-		"input_layernorm", "attn_norm",
-		"self_attn.qkv_proj", "attn_qkv",
-		"self_attn.o_proj", "attn_output",
-		"mlp.down_proj", "ffn_down",
-		"mlp.gate_up_proj", "ffn_up",
-		"post_attention_layernorm", "ffn_norm",
-	}
-}
-
-type ropeFactor []float32
-
-func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
-	err := binary.Write(w, binary.LittleEndian, r)
-	return 0, err
-}
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@ -1,44 +1,48 @@
+//go:build slow
+
 package convert

 import (
-	"bytes"
-	"crypto/sha256"
-	"encoding/binary"
-	"encoding/hex"
-	"encoding/json"
-	"flag"
-	"fmt"
-	"io"
-	"io/fs"
-	"log/slog"
-	"math"
 	"os"
 	"path/filepath"
-	"slices"
-	"strings"
 	"testing"

-	"golang.org/x/exp/maps"
-
 	"github.com/ollama/ollama/llm"
 )

-type tensorData struct {
-	Offsets []int  `json:"data_offsets"`
-	Type    string `json:"dtype"`
-	Shape   []int  `json:"shape"`
-}
-
-func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
+func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
 	t.Helper()

+	mf, err := GetModelFormat(p)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	params, err := mf.GetParams(p)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	arch, err := mf.GetModelArch("", p, params)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := arch.LoadVocab(); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := arch.GetTensors(); err != nil {
+		t.Fatal(err)
+	}
+
 	f, err := os.CreateTemp(t.TempDir(), "f16")
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()

-	if err := ConvertModel(fsys, f); err != nil {
+	if err := arch.WriteGGUF(f); err != nil {
 		t.Fatal(err)
 	}

@ -46,431 +50,54 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	t.Cleanup(func() { r.Close() })
+	defer r.Close()

-	m, _, err := llm.DecodeGGML(r, math.MaxInt)
+	m, _, err := llm.DecodeGGML(r)
 	if err != nil {
 		t.Fatal(err)
 	}

-	if _, err := r.Seek(0, io.SeekStart); err != nil {
-		t.Fatal(err)
-	}
-
-	return r, m.KV(), m.Tensors()
+	return m.KV(), m.Tensors()
 }

-func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tensors) map[string]string {
-	actual := make(map[string]string)
-	for k, v := range kv {
-		if s, ok := v.(json.Marshaler); !ok {
-			actual[k] = fmt.Sprintf("%v", v)
-		} else {
-			bts, err := json.Marshal(s)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
-		}
+func TestConvertFull(t *testing.T) {
+	cases := []struct {
+		path    string
+		arch    string
+		tensors int
+		layers  int
+	}{
+		{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
+		{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
+		{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
+		{"gemma-2b-it", "gemma", 164, 20},
 	}

-	for _, tensor := range tensors.Items {
-		sha256sum := sha256.New()
-		sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
-		if _, err := io.Copy(sha256sum, sr); err != nil {
-			t.Fatal(err)
-		}
-
-		actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
-	}
-
-	return actual
-}
-
-func TestMain(m *testing.M) {
-	var level slog.Level
-	flag.TextVar(&level, "level", slog.LevelInfo, "log level")
-	flag.Parse()
-	slog.SetLogLoggerLevel(level)
-	os.Exit(m.Run())
-}
-
-func TestConvertModel(t *testing.T) {
-	cases := []string{
-		"Meta-Llama-3-8B-Instruct",
-		"Meta-Llama-3.1-8B-Instruct",
-		"Mistral-7B-Instruct-v0.2",
-		"Mixtral-8x7B-Instruct-v0.1",
-		"gemma-2b-it",
-		"gemma-2-2b-it",
-		// microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
-		"Phi-3-mini-128k-instruct",
-		"all-MiniLM-L6-v2",
-		"gemma-2-9b-it",
-	}
-
-	for i := range cases {
-		tt := cases[i]
-		t.Run(tt, func(t *testing.T) {
-			t.Parallel()
-
-			p := filepath.Join("testdata", tt)
-			if testing.Short() {
-				t.Skip("skipping in short mode")
-			} else if _, err := os.Stat(p); err != nil {
+	for _, tt := range cases {
+		t.Run(tt.path, func(t *testing.T) {
+			p := filepath.Join("testdata", tt.path)
+			if _, err := os.Stat(p); err != nil {
 				t.Skipf("%s not found", p)
 			}

-			f, kv, tensors := convertFull(t, os.DirFS(p))
-			actual := generateResultsJSON(t, f, kv, tensors)
+			kv, tensors := convertFull(t, p)

-			expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
-			if err != nil {
-				t.Fatal(err)
+			if kv.Architecture() != tt.arch {
+				t.Fatalf("expected llama, got %s", kv.Architecture())
 			}

-			var expect map[string]string
-			if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
-				t.Fatal(err)
+			if kv.FileType().String() != "F16" {
+				t.Fatalf("expected F16, got %s", kv.FileType())
 			}

-			keys := maps.Keys(expect)
-			slices.Sort(keys)
-			for _, k := range keys {
-				if v, ok := actual[k]; !ok {
-					t.Errorf("missing %s", k)
-				} else if v != expect[k] {
-					t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
-				}
+			if len(tensors) != tt.tensors {
+				t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
+			}
+
+			layers := tensors.Layers()
+			if len(layers) != tt.layers {
+				t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
 			}
 		})
 	}
 }
-
-func TestConvertInvalidTensorNames(t *testing.T) {
-	f, err := os.CreateTemp(t.TempDir(), "testmodel")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	tempDir := t.TempDir()
-
-	td := map[string]*tensorData{}
-	offset := 4096
-
-	td["model.layers.0.self_attn.q_proj.weight"] = &tensorData{
-		Offsets: []int{0, offset},
-		Type:    "F32",
-		Shape:   []int{4096, 4096},
-	}
-	td["blk.0.attn_q.weight"] = &tensorData{
-		Offsets: []int{offset, offset * 2},
-		Type:    "F32",
-		Shape:   []int{4096, 4096},
-	}
-	generateSafetensorTestData(t, tempDir, td)
-
-	err = ConvertModel(os.DirFS(tempDir), f)
-	if err == nil || !strings.HasPrefix(err.Error(), "duplicate tensor name") {
-		t.Errorf("expected error but didn't get one")
-	}
-}
-
-func TestConvertInvalidDatatype(t *testing.T) {
-	f, err := os.CreateTemp(t.TempDir(), "testmodel")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	tempDir := t.TempDir()
-
-	td := map[string]*tensorData{}
-	offset := 4096 * 14336
-
-	td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
-		Offsets: []int{0, offset},
-		Type:    "I8",
-		Shape:   []int{4096, 14336},
-	}
-	td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
-		Offsets: []int{offset, offset},
-		Type:    "U8",
-		Shape:   []int{},
-	}
-	generateSafetensorTestData(t, tempDir, td)
-
-	err = ConvertModel(os.DirFS(tempDir), f)
-	if err == nil || err.Error() != "unsupported safetensors model" {
-		t.Errorf("expected error but didn't get one")
-	}
-}
-
-func generateSafetensorTestData(t *testing.T, tempDir string, tensorData map[string]*tensorData) {
-	data, err := json.Marshal(tensorData)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	var buf bytes.Buffer
-
-	l := int64(len(data))
-	err = binary.Write(&buf, binary.LittleEndian, l)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	_, err = buf.Write(data)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer fdata.Close()
-
-	_, err = fdata.Write(buf.Bytes())
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	configData := `
-{
-  "architectures": [
-    "LlamaForCausalLM"
-  ]
-}
-`
-
-	f, err := os.Create(filepath.Join(tempDir, "config.json"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	_, err = f.WriteString(configData)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	tokenizerData := `
-{
-}
-`
-
-	f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	_, err = f.WriteString(tokenizerData)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func TestConvertAdapter(t *testing.T) {
-	type AdapterCase struct {
-		Name     string
-		BaseKV   map[string]any
-		Expected map[string]string
-	}
-
-	cases := []AdapterCase{
-		{
-			Name: "discollama",
-			BaseKV: map[string]any{
-				"general.architecture":          "llama",
-				"llama.attention.head_count":    uint32(32),
-				"llama.attention.head_count_kv": uint32(8),
-			},
-			Expected: map[string]string{
-				"general.architecture":          "llama",
-				"general.file_type":             "1",
-				"general.parameter_count":       "106496",
-				"general.type":                  "adapter",
-				"general.version":               "v0.2",
-				"adapter.lora.alpha":            "16",
-				"adapter.type":                  "lora",
-				"llama.attention.head_count":    "32",
-				"llama.attention.head_count_kv": "8",
-				"blk.31.attn_q.weight.lora_a":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
-				"blk.31.attn_q.weight.lora_b":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
-				"blk.31.attn_v.weight.lora_a":   "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
-				"blk.31.attn_v.weight.lora_b":   "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
-			},
-		},
-	}
-
-	for _, c := range cases {
-		t.Run(c.Name, func(t *testing.T) {
-			t.Parallel()
-
-			f, err := os.CreateTemp(t.TempDir(), "f16")
-			if err != nil {
-				t.Fatal(err)
-			}
-			defer f.Close()
-
-			tempDir := t.TempDir()
-			generateLoraTestData(t, tempDir)
-
-			if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
-				t.Fatal(err)
-			}
-
-			r, err := os.Open(f.Name())
-			if err != nil {
-				t.Fatal(err)
-			}
-			defer r.Close()
-
-			m, _, err := llm.DecodeGGML(r, math.MaxInt)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			if _, err := r.Seek(0, io.SeekStart); err != nil {
-				t.Fatal(err)
-			}
-
-			actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
-
-			keys := maps.Keys(c.Expected)
-			slices.Sort(keys)
-			for _, k := range keys {
-				if v, ok := actual[k]; !ok {
-					t.Errorf("missing %s", k)
-				} else if v != c.Expected[k] {
-					t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
-				}
-			}
-		})
-	}
-}
-
-func generateLoraTestData(t *testing.T, tempDir string) {
-	offset := 4096 * 8 * 4
-
-	td := map[string]*tensorData{"__metadata__": nil}
-	td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
-		Offsets: []int{0, offset},
-		Type:    "F32",
-		Shape:   []int{4096, 8},
-	}
-	td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
-		Offsets: []int{offset, offset * 2},
-		Type:    "F32",
-		Shape:   []int{8, 4096},
-	}
-	td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
-		Offsets: []int{offset * 2, offset * 3},
-		Type:    "F32",
-		Shape:   []int{4096, 8},
-	}
-	td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
-		Offsets: []int{offset * 3, offset*3 + 8*1024*4},
-		Type:    "F32",
-		Shape:   []int{8, 1024},
-	}
-
-	data, err := json.Marshal(td)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	var buf bytes.Buffer
-
-	l := int64(len(data))
-	err = binary.Write(&buf, binary.LittleEndian, l)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	_, err = buf.Write(data)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// write some data for the tensors
-
-	ones := make([]float32, 4096*8)
-	for i := range ones {
-		ones[i] = float32(1)
-	}
-
-	for range 3 {
-		err = binary.Write(&buf, binary.LittleEndian, ones)
-		if err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	ones = make([]float32, 1024*8)
-	for i := range ones {
-		ones[i] = float32(1)
-	}
-
-	err = binary.Write(&buf, binary.LittleEndian, ones)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer fdata.Close()
-
-	_, err = fdata.Write(buf.Bytes())
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	configData := `
-{
-    "adapter_path": "adapters-test",
-    "batch_size": 8,
-    "config": "config-tiny.json",
-    "data": "../discollama-completion",
-    "grad_checkpoint": null,
-    "iters": 1000,
-    "learning_rate": 1e-05,
-    "lora_layers": 1,
-    "lora_parameters": {
-        "rank": 8,
-        "alpha": 16,
-        "dropout": 0.0,
-        "scale": 2.0
-    },
-    "lr_schedule": null,
-    "max_seq_length": 2048,
-    "model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
-    "resume_adapter_file": null,
-    "save_every": 100,
-    "seed": 0,
-    "steps_per_eval": 200,
-    "steps_per_report": 10,
-    "test": false,
-    "test_batches": 500,
-    "train": true,
-    "use_dora": false,
-    "val_batches": 25
-}
-`
-	f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-
-	_, err = f.WriteString(configData)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
--- a/convert/fs.go
+++ b/convert/fs.go
@ -1,58 +0,0 @@
-package convert
-
-import (
-	"archive/zip"
-	"errors"
-	"io"
-	"io/fs"
-	"os"
-	"path/filepath"
-)
-
-type ZipReader struct {
-	r *zip.Reader
-	p string
-
-	// limit is the maximum size of a file that can be read directly
-	// from the zip archive. Files larger than this size will be extracted
-	limit int64
-}
-
-func NewZipReader(r *zip.Reader, p string, limit int64) fs.FS {
-	return &ZipReader{r, p, limit}
-}
-
-func (z *ZipReader) Open(name string) (fs.File, error) {
-	r, err := z.r.Open(name)
-	if err != nil {
-		return nil, err
-	}
-	defer r.Close()
-
-	if fi, err := r.Stat(); err != nil {
-		return nil, err
-	} else if fi.Size() < z.limit {
-		return r, nil
-	}
-
-	if !filepath.IsLocal(name) {
-		return nil, zip.ErrInsecurePath
-	}
-
-	n := filepath.Join(z.p, name)
-	if _, err := os.Stat(n); errors.Is(err, os.ErrNotExist) {
-		w, err := os.Create(n)
-		if err != nil {
-			return nil, err
-		}
-		defer w.Close()
-
-		if _, err := io.Copy(w, r); err != nil {
-			return nil, err
-		}
-	} else if err != nil {
-		return nil, err
-	}
-
-	return os.Open(n)
-}
--- a/convert/gemma.go
+++ b/convert/gemma.go
@ -0,0 +1,103 @@
+package convert
+
+import (
+	"fmt"
+	"io"
+	"log/slog"
+	"strings"
+
+	"github.com/pdevine/tensor"
+	"github.com/pdevine/tensor/native"
+
+	"github.com/ollama/ollama/llm"
+)
+
+type GemmaModel struct {
+	ModelData
+}
+
+func addOnes(data []float32, vectorSize int) ([]float32, error) {
+	n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
+	ones := tensor.Ones(tensor.Float32, vectorSize)
+
+	n, err := n.Add(ones)
+	if err != nil {
+		return nil, err
+	}
+
+	ts, err := native.SelectF32(n, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	var f32s []float32
+	for _, t := range ts {
+		f32s = append(f32s, t...)
+	}
+
+
+	return f32s, nil
+}
+
+func (m *GemmaModel) GetTensors() error {
+	t, err := m.Format.GetTensors(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+
+	slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
+	for _, l := range t {
+		if strings.HasSuffix(l.Name, "norm.weight") {
+			wt := l.WriterTo.(safetensorWriterTo)
+			wt.repacker = m.Repack
+			l.WriterTo = wt
+		}
+		m.Tensors = append(m.Tensors, l)
+	}
+
+	return nil
+}
+
+func (m *GemmaModel) LoadVocab() error {
+	v, err := LoadSentencePieceTokens(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+	m.Vocab = v
+	return nil
+}
+
+func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
+	return addOnes(data, int(shape[0]))
+}
+
+func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
+	kv := llm.KV{
+		"general.architecture":                   "gemma",
+		"general.name":                           m.Name,
+		"gemma.context_length":                   uint32(m.Params.ContextSize),
+		"gemma.embedding_length":                 uint32(m.Params.HiddenSize),
+		"gemma.block_count":                      uint32(m.Params.HiddenLayers),
+		"gemma.feed_forward_length":              uint32(m.Params.IntermediateSize),
+		"gemma.attention.head_count":             uint32(m.Params.AttentionHeads),
+		"gemma.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
+		"gemma.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
+		"gemma.attention.key_length":             uint32(m.Params.HeadDimension),
+		"gemma.attention.value_length":           uint32(m.Params.HeadDimension),
+		"general.file_type":                      uint32(1),
+		"tokenizer.ggml.model":                   "llama",
+
+		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
+		"tokenizer.ggml.scores":     m.Vocab.Scores,
+		"tokenizer.ggml.token_type": m.Vocab.Types,
+
+		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
+		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
+		"tokenizer.ggml.padding_token_id": uint32(m.Params.PaddingTokenID),
+		"tokenizer.ggml.unknown_token_id": uint32(3),
+		"tokenizer.ggml.add_bos_token":    true,
+		"tokenizer.ggml.add_eos_token":    false,
+	}
+
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+}
--- a/convert/llama.go
+++ b/convert/llama.go
@ -0,0 +1,158 @@
+package convert
+
+import (
+	"cmp"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	"github.com/pdevine/tensor"
+	"github.com/pdevine/tensor/native"
+
+	"github.com/ollama/ollama/llm"
+)
+
+type LlamaModel struct {
+	ModelData
+}
+
+func (m *LlamaModel) GetTensors() error {
+	t, err := m.Format.GetTensors(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+
+	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return err
+	}
+
+	for _, l := range t {
+		matches := re.FindAllStringSubmatch(l.Name, -1)
+		if len(matches) > 0 {
+			switch m.Format.(type) {
+			case *TorchFormat:
+				wt := l.WriterTo.(torchWriterTo)
+				wt.repacker = m.Repack
+				l.WriterTo = wt
+			case *SafetensorFormat:
+				wt := l.WriterTo.(safetensorWriterTo)
+				wt.repacker = m.Repack
+				l.WriterTo = wt
+			}
+		}
+		m.Tensors = append(m.Tensors, l)
+	}
+
+	return nil
+}
+
+func (m *LlamaModel) LoadVocab() (err error) {
+	pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
+	if errors.Is(err, os.ErrNotExist) {
+		return nil
+	} else if err != nil {
+		return err
+	}
+
+	m.Vocab = &Vocab{}
+	for _, t := range ts {
+		m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
+		m.Vocab.Types = append(m.Vocab.Types, t.Type())
+	}
+
+	m.Vocab.Merges = merges
+	m.Params.PreTokenizer = pre
+	return nil
+}
+
+func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
+	kv := llm.KV{
+		"general.architecture":                   "llama",
+		"general.name":                           m.Name,
+		"llama.vocab_size":                       uint32(len(m.Vocab.Tokens)),
+		"llama.context_length":                   uint32(m.Params.ContextSize),
+		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
+		"llama.block_count":                      uint32(m.Params.HiddenLayers),
+		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
+		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
+		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
+		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
+		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
+		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
+		"general.file_type":                      uint32(1),
+		"tokenizer.ggml.model":                   "gpt2",
+
+		"tokenizer.ggml.pre":        m.Params.PreTokenizer,
+		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
+		"tokenizer.ggml.token_type": m.Vocab.Types,
+
+		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
+		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
+		"tokenizer.ggml.unknown_token_id": uint32(0),
+	}
+
+	if len(m.Vocab.Merges) > 0 {
+		kv["tokenizer.ggml.merges"] = m.Vocab.Merges
+	} else {
+		kv["tokenizer.ggml.scores"] = m.Vocab.Scores
+	}
+
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+}
+
+func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
+	return llamaRepack(name, m.Params, data, shape)
+}
+
+func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
+	var dims []int
+	for _, dim := range shape {
+		if dim != 0 {
+			dims = append(dims, int(dim))
+		}
+	}
+
+	var heads int
+	if strings.HasSuffix(name, "attn_q.weight") {
+		heads = params.AttentionHeads
+	} else if strings.HasSuffix(name, "attn_k.weight") {
+		heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
+	} else {
+		return nil, fmt.Errorf("unknown tensor name: %s", name)
+	}
+
+	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
+	if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
+		return nil, err
+	}
+
+	if err := n.T(0, 2, 1, 3); err != nil {
+		return nil, err
+	}
+
+	if err := n.Reshape(dims...); err != nil {
+		return nil, err
+	}
+
+	if err := n.Transpose(); err != nil {
+		return nil, err
+	}
+
+	ts, err := native.SelectF32(n, 1)
+	if err != nil {
+		return nil, err
+	}
+
+	var f32s []float32
+	for _, t := range ts {
+		f32s = append(f32s, t...)
+	}
+
+	return f32s, nil
+}
--- a/convert/mistral.go
+++ b/convert/mistral.go
@ -0,0 +1,79 @@
+package convert
+
+import (
+	"io"
+	"regexp"
+
+	"github.com/ollama/ollama/llm"
+)
+
+type MistralModel struct {
+	ModelData
+}
+
+func (m *MistralModel) GetTensors() error {
+	t, err := m.Format.GetTensors(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+
+	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return err
+	}
+
+	for _, l := range t {
+		matches := re.FindAllStringSubmatch(l.Name, -1)
+		if len(matches) > 0 {
+			wt := l.WriterTo.(safetensorWriterTo)
+			wt.repacker = m.Repack
+			l.WriterTo = wt
+		}
+		m.Tensors = append(m.Tensors, l)
+	}
+
+	return nil
+}
+
+func (m *MistralModel) LoadVocab() error {
+	v, err := LoadSentencePieceTokens(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+	m.Vocab = v
+	return nil
+}
+
+func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
+	kv := llm.KV{
+		"general.architecture":                   "llama",
+		"general.name":                           m.Name,
+		"llama.context_length":                   uint32(m.Params.ContextSize),
+		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
+		"llama.block_count":                      uint32(m.Params.HiddenLayers),
+		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
+		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
+		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
+		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
+		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
+		"general.file_type":                      uint32(1),
+		"tokenizer.ggml.model":                   "llama",
+
+		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
+		"tokenizer.ggml.scores":     m.Vocab.Scores,
+		"tokenizer.ggml.token_type": m.Vocab.Types,
+
+		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
+		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
+		"tokenizer.ggml.add_bos_token":    true,
+		"tokenizer.ggml.add_eos_token":    false,
+		"tokenizer.ggml.unknown_token_id": uint32(0),
+	}
+
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+}
+
+func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
+	return llamaRepack(name, m.Params, data, shape)
+}
--- a/convert/mixtral.go
+++ b/convert/mixtral.go
@ -0,0 +1,87 @@
+package convert
+
+import (
+	"io"
+	"regexp"
+
+	"github.com/ollama/ollama/llm"
+)
+
+type MixtralModel struct {
+	ModelData
+}
+
+func (m *MixtralModel) GetTensors() error {
+	t, err := m.Format.GetTensors(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+
+	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
+	re, err := regexp.Compile(pattern)
+	if err != nil {
+		return err
+	}
+
+	for _, l := range t {
+		matches := re.FindAllStringSubmatch(l.Name, -1)
+		if len(matches) > 0 {
+			wt := l.WriterTo.(safetensorWriterTo)
+			wt.repacker = m.Repack
+			l.WriterTo = wt
+		}
+		m.Tensors = append(m.Tensors, l)
+	}
+
+	return nil
+}
+
+func (m *MixtralModel) LoadVocab() error {
+	v, err := LoadSentencePieceTokens(m.Path, m.Params)
+	if err != nil {
+		return err
+	}
+	m.Vocab = v
+	return nil
+}
+
+func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
+	kv := llm.KV{
+		"general.architecture":          "llama",
+		"general.name":                  m.Name,
+		"llama.block_count":             uint32(m.Params.HiddenLayers),
+		"llama.context_length":          uint32(m.Params.ContextSize),
+		"llama.embedding_length":        uint32(m.Params.HiddenSize),
+		"llama.feed_forward_length":     uint32(m.Params.IntermediateSize),
+		"llama.attention.head_count":    uint32(m.Params.AttentionHeads),
+		"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
+
+		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
+		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
+
+		"llama.expert_count":      uint32(m.Params.Experts),
+		"llama.expert_used_count": uint32(m.Params.ExpertsUsed),
+
+		"llama.vocab_size":           uint32(len(m.Vocab.Tokens)),
+		"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
+
+		"general.file_type":    uint32(1),
+		"tokenizer.ggml.model": "llama",
+
+		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
+		"tokenizer.ggml.scores":     m.Vocab.Scores,
+		"tokenizer.ggml.token_type": m.Vocab.Types,
+
+		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
+		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
+		"tokenizer.ggml.unknown_token_id": uint32(0),
+		"tokenizer.ggml.add_bos_token":    true,
+		"tokenizer.ggml.add_eos_token":    false,
+	}
+
+	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
+}
+
+func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
+	return llamaRepack(name, m.Params, data, shape)
+}
--- a/convert/reader.go
+++ b/convert/reader.go
@ -1,86 +0,0 @@
-package convert
-
-import (
-	"errors"
-	"io"
-	"io/fs"
-	"strings"
-)
-
-type Tensor interface {
-	Name() string
-	Shape() []uint64
-	Kind() uint32
-	SetRepacker(repacker)
-	WriteTo(io.Writer) (int64, error)
-}
-
-type tensorBase struct {
-	name  string
-	shape []uint64
-	repacker
-}
-
-func (t tensorBase) Name() string {
-	return t.name
-}
-
-func (t tensorBase) Shape() []uint64 {
-	return t.shape
-}
-
-const (
-	tensorKindF32 uint32 = iota
-	tensorKindF16
-)
-
-func (t tensorBase) Kind() uint32 {
-	if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
-		t.name == "token_types.weight" {
-		// these tensors are always F32
-		return 0
-	}
-
-	switch len(t.shape) {
-	case 0:
-		panic("invalid tensor shape")
-	case 1:
-		return tensorKindF32
-	default:
-		return tensorKindF16
-	}
-}
-
-func (t *tensorBase) SetRepacker(fn repacker) {
-	t.repacker = fn
-}
-
-type repacker func(string, []float32, []uint64) ([]float32, error)
-
-func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
-	patterns := []struct {
-		Pattern string
-		Func    func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
-	}{
-		{"model-*-of-*.safetensors", parseSafetensors},
-		{"model.safetensors", parseSafetensors},
-		{"adapters.safetensors", parseSafetensors},
-		{"adapter_model.safetensors", parseSafetensors},
-		{"pytorch_model-*-of-*.bin", parseTorch},
-		{"pytorch_model.bin", parseTorch},
-		{"consolidated.*.pth", parseTorch},
-	}
-
-	for _, pattern := range patterns {
-		matches, err := fs.Glob(fsys, pattern.Pattern)
-		if err != nil {
-			return nil, err
-		}
-
-		if len(matches) > 0 {
-			return pattern.Func(fsys, replacer, matches...)
-		}
-	}
-
-	return nil, errors.New("unknown tensor format")
-}
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@ -1,163 +0,0 @@
-package convert
-
-import (
-	"bytes"
-	"encoding/binary"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"io/fs"
-	"slices"
-	"strings"
-
-	"github.com/d4l3k/go-bfloat16"
-	"github.com/x448/float16"
-	"golang.org/x/exp/maps"
-)
-
-type safetensorMetadata struct {
-	Type    string   `json:"dtype"`
-	Shape   []uint64 `json:"shape"`
-	Offsets []int64  `json:"data_offsets"`
-}
-
-func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
-	var ts []Tensor
-	for _, p := range ps {
-		f, err := fsys.Open(p)
-		if err != nil {
-			return nil, err
-		}
-		defer f.Close()
-
-		var n int64
-		if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
-			return nil, err
-		}
-
-		b := bytes.NewBuffer(make([]byte, 0, n))
-		if _, err = io.CopyN(b, f, n); err != nil {
-			return nil, err
-		}
-
-		var headers map[string]safetensorMetadata
-		if err := json.NewDecoder(b).Decode(&headers); err != nil {
-			return nil, err
-		}
-
-		keys := maps.Keys(headers)
-		slices.Sort(keys)
-
-		names := make(map[string]struct{}, len(keys))
-
-		for _, key := range keys {
-			if value := headers[key]; value.Type != "" {
-				// bitsandbytes quantized models are unsupported
-				if len(value.Shape) == 0 {
-					return nil, errors.New("unsupported safetensors model")
-				}
-				ggufName := replacer.Replace(key)
-				if _, ok := names[ggufName]; ok {
-					return nil, fmt.Errorf("duplicate tensor name '%s' was found for this model", ggufName)
-				}
-				names[ggufName] = struct{}{}
-				ts = append(ts, safetensor{
-					fs:     fsys,
-					path:   p,
-					dtype:  value.Type,
-					offset: safetensorsPad(n, value.Offsets[0]),
-					size:   safetensorsPad(n, value.Offsets[1]) - safetensorsPad(n, value.Offsets[0]),
-					tensorBase: &tensorBase{
-						name:  ggufName,
-						shape: value.Shape,
-					},
-				})
-			}
-		}
-	}
-
-	return ts, nil
-}
-
-// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
-func safetensorsPad(n, offset int64) int64 {
-	return 8 + n + offset
-}
-
-type safetensor struct {
-	fs     fs.FS
-	path   string
-	dtype  string
-	offset int64
-	size   int64
-	*tensorBase
-}
-
-func (st safetensor) WriteTo(w io.Writer) (int64, error) {
-	f, err := st.fs.Open(st.path)
-	if err != nil {
-		return 0, err
-	}
-	defer f.Close()
-
-	if seeker, ok := f.(io.Seeker); ok {
-		if _, err := seeker.Seek(st.offset, io.SeekStart); err != nil {
-			return 0, err
-		}
-	} else {
-		if _, err := io.CopyN(io.Discard, f, st.offset); err != nil {
-			return 0, err
-		}
-	}
-
-	var f32s []float32
-	switch st.dtype {
-	case "F32":
-		f32s = make([]float32, st.size/4)
-		if err = binary.Read(f, binary.LittleEndian, f32s); err != nil {
-			return 0, err
-		}
-	case "F16":
-		u16s := make([]uint16, st.size/2)
-		if err = binary.Read(f, binary.LittleEndian, u16s); err != nil {
-			return 0, err
-		}
-
-		f32s = make([]float32, len(u16s))
-		for i := range u16s {
-			f32s[i] = float16.Frombits(u16s[i]).Float32()
-		}
-
-	case "BF16":
-		u8s := make([]uint8, st.size)
-		if err = binary.Read(f, binary.LittleEndian, u8s); err != nil {
-			return 0, err
-		}
-
-		f32s = bfloat16.DecodeFloat32(u8s)
-	default:
-		return 0, fmt.Errorf("unknown data type: %s", st.dtype)
-	}
-
-	if st.repacker != nil {
-		f32s, err = st.repacker(st.Name(), f32s, st.Shape())
-		if err != nil {
-			return 0, err
-		}
-	}
-
-	switch st.Kind() {
-	case tensorKindF32:
-		return 0, binary.Write(w, binary.LittleEndian, f32s)
-	case tensorKindF16:
-		f16s := make([]uint16, len(f32s))
-		for i := range f32s {
-			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
-		}
-
-		return 0, binary.Write(w, binary.LittleEndian, f16s)
-	default:
-		return 0, fmt.Errorf("unknown storage type: %d", st.Kind())
-	}
-}
--- a/convert/reader_torch.go
+++ b/convert/reader_torch.go
@ -1,48 +0,0 @@
-package convert
-
-import (
-	"io"
-	"io/fs"
-	"strings"
-
-	"github.com/nlpodyssey/gopickle/pytorch"
-	"github.com/nlpodyssey/gopickle/types"
-)
-
-func parseTorch(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]Tensor, error) {
-	var ts []Tensor
-	for _, p := range ps {
-		pt, err := pytorch.Load(p)
-		if err != nil {
-			return nil, err
-		}
-
-		for _, k := range pt.(*types.Dict).Keys() {
-			t := pt.(*types.Dict).MustGet(k)
-
-			var shape []uint64
-			for dim := range t.(*pytorch.Tensor).Size {
-				shape = append(shape, uint64(dim))
-			}
-
-			ts = append(ts, torch{
-				storage: t.(*pytorch.Tensor).Source,
-				tensorBase: &tensorBase{
-					name:  replacer.Replace(k.(string)),
-					shape: shape,
-				},
-			})
-		}
-	}
-
-	return ts, nil
-}
-
-type torch struct {
-	storage pytorch.StorageInterface
-	*tensorBase
-}
-
-func (pt torch) WriteTo(w io.Writer) (int64, error) {
-	return 0, nil
-}
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@ -0,0 +1,309 @@
+package convert
+
+import (
+	"bytes"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"regexp"
+	"slices"
+	"strings"
+
+	"github.com/d4l3k/go-bfloat16"
+	"github.com/x448/float16"
+
+	"github.com/ollama/ollama/llm"
+)
+
+type safetensorWriterTo struct {
+	t *llm.Tensor
+
+	params *Params
+	bo     ByteOrder
+
+	filename string
+	dtype    string
+
+	offset, size int64
+	repacker     func(string, []float32, []uint64) ([]float32, error)
+}
+
+type safetensorMetadata struct {
+	Type    string   `json:"dtype"`
+	Shape   []uint64 `json:"shape"`
+	Offsets []int64  `json:"data_offsets"`
+}
+
+type SafetensorFormat struct{}
+
+func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
+	var tensors []llm.Tensor
+	matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
+	if err != nil {
+		return nil, err
+	}
+
+	var offset uint64
+	for _, f := range matches {
+		var t []llm.Tensor
+		var err error
+		t, offset, err = m.readTensors(f, offset, params)
+		if err != nil {
+			return nil, err
+		}
+
+		tensors = append(tensors, t...)
+	}
+	return tensors, nil
+}
+
+func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params) ([]llm.Tensor, uint64, error) {
+	f, err := os.Open(fn)
+	if err != nil {
+		return nil, 0, err
+	}
+	defer f.Close()
+
+	var n int64
+	if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
+		return nil, 0, err
+	}
+
+	b := bytes.NewBuffer(make([]byte, 0, n))
+	if _, err = io.CopyN(b, f, n); err != nil {
+		return nil, 0, err
+	}
+
+	var headers map[string]safetensorMetadata
+	if err := json.NewDecoder(b).Decode(&headers); err != nil {
+		return nil, 0, err
+	}
+
+	var keys []string
+	for key := range headers {
+		if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
+			keys = append(keys, key)
+		}
+	}
+
+	slices.Sort(keys)
+
+	var tensors []llm.Tensor
+	for _, key := range keys {
+		value := headers[key]
+
+		var kind uint32
+		switch len(value.Shape) {
+		case 0:
+			// valuedata
+			continue
+		case 2:
+			kind = 1
+		}
+
+		name, err := m.GetLayerName(key)
+		if err != nil {
+			return nil, 0, err
+		}
+
+		shape := make([]uint64, len(value.Shape))
+		copy(shape, value.Shape)
+
+		pad := func(s int64) int64 {
+			return 8 + n + s
+		}
+
+		t := llm.Tensor{
+			Name:   name,
+			Kind:   kind,
+			Offset: offset,
+			Shape:  shape[:],
+		}
+
+		t.WriterTo = safetensorWriterTo{
+			t:        &t,
+			params:   params,
+			bo:       params.ByteOrder,
+			filename: fn,
+			dtype:    value.Type,
+			offset:   pad(value.Offsets[0]),
+			size:     pad(value.Offsets[1]) - pad(value.Offsets[0]),
+		}
+
+		offset += t.Size()
+		tensors = append(tensors, t)
+	}
+
+	return tensors, offset, nil
+}
+
+func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
+	f, err := os.Open(filepath.Join(dirpath, "config.json"))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var params Params
+
+	if err := json.NewDecoder(f).Decode(&params); err != nil {
+		return nil, err
+	}
+
+	params.ByteOrder = binary.LittleEndian
+	return &params, nil
+}
+
+func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
+	directMap := map[string]string{
+		"model.embed_tokens.weight": "token_embd.weight",
+		"lm_head.weight":            "output.weight",
+		"model.norm.weight":         "output_norm.weight",
+	}
+
+	tMap := map[string]string{
+		"model.layers.(\\d+).input_layernorm.weight":                    "blk.$1.attn_norm.weight",
+		"model.layers.(\\d+).mlp.down_proj.weight":                      "blk.$1.ffn_down.weight",
+		"model.layers.(\\d+).mlp.gate_proj.weight":                      "blk.$1.ffn_gate.weight",
+		"model.layers.(\\d+).mlp.up_proj.weight":                        "blk.$1.ffn_up.weight",
+		"model.layers.(\\d+).post_attention_layernorm.weight":           "blk.$1.ffn_norm.weight",
+		"model.layers.(\\d+).self_attn.k_proj.weight":                   "blk.$1.attn_k.weight",
+		"model.layers.(\\d+).self_attn.o_proj.weight":                   "blk.$1.attn_output.weight",
+		"model.layers.(\\d+).self_attn.q_proj.weight":                   "blk.$1.attn_q.weight",
+		"model.layers.(\\d+).self_attn.v_proj.weight":                   "blk.$1.attn_v.weight",
+		"model.layers.(\\d+).block_sparse_moe.gate.weight":              "blk.$1.ffn_gate_inp.weight",
+		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w1.weight": "blk.$1.ffn_gate.$2.weight",
+		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w2.weight": "blk.$1.ffn_down.$2.weight",
+		"model.layers.(\\d+).block_sparse_moe.experts.(\\d+).w3.weight": "blk.$1.ffn_up.$2.weight",
+	}
+
+	v, ok := directMap[n]
+	if ok {
+		return v, nil
+	}
+
+	// quick hack to rename the layers to gguf format
+	for k, v := range tMap {
+		re := regexp.MustCompile(k)
+		newName := re.ReplaceAllString(n, v)
+		if newName != n {
+			return newName, nil
+		}
+	}
+
+	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
+}
+
+func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
+	f, err := os.Open(r.filename)
+	if err != nil {
+		return 0, err
+	}
+	defer f.Close()
+
+	if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
+		return 0, err
+	}
+
+	var f32s []float32
+	switch r.dtype {
+	case "F32":
+		f32s = make([]float32, r.size/4)
+		if err = binary.Read(f, r.bo, f32s); err != nil {
+			return 0, err
+		}
+	case "F16":
+		u16s := make([]uint16, r.size/2)
+		if err = binary.Read(f, r.bo, u16s); err != nil {
+			return 0, err
+		}
+
+		for _, b := range u16s {
+			f32s = append(f32s, float16.Frombits(b).Float32())
+		}
+
+	case "BF16":
+		u8s := make([]uint8, r.size)
+		if err = binary.Read(f, r.bo, u8s); err != nil {
+			return 0, err
+		}
+
+		f32s = bfloat16.DecodeFloat32(u8s)
+	default:
+		return 0, fmt.Errorf("unknown data type: %s", r.dtype)
+	}
+
+	if r.repacker != nil {
+		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	switch r.t.Kind {
+	case 0:
+		return 0, binary.Write(w, r.bo, f32s)
+	case 1:
+		f16s := make([]uint16, len(f32s))
+		for i := range f32s {
+			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
+		}
+
+		return 0, binary.Write(w, r.bo, f16s)
+	default:
+		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
+	}
+}
+
+func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
+	switch len(params.Architectures) {
+	case 0:
+		return nil, fmt.Errorf("No architecture specified to convert")
+	case 1:
+		switch params.Architectures[0] {
+		case "LlamaForCausalLM":
+			return &LlamaModel{
+				ModelData{
+					Name:   name,
+					Path:   dirPath,
+					Params: params,
+					Format: m,
+				},
+			}, nil
+		case "MistralForCausalLM":
+			return &MistralModel{
+				ModelData{
+					Name:   name,
+					Path:   dirPath,
+					Params: params,
+					Format: m,
+				},
+			}, nil
+		case "MixtralForCausalLM":
+			return &MixtralModel{
+				ModelData{
+					Name:   name,
+					Path:   dirPath,
+					Params: params,
+					Format: m,
+				},
+			}, nil
+		case "GemmaForCausalLM":
+			return &GemmaModel{
+				ModelData{
+					Name:   name,
+					Path:   dirPath,
+					Params: params,
+					Format: m,
+				},
+			}, nil
+		default:
+			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
+		}
+	}
+
+	return nil, fmt.Errorf("Unknown error")
+}
--- a/convert/testdata/Meta-Llama-3-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3-8B-Instruct.json
@ -1,313 +0,0 @@
-{
-  "general.architecture": "llama",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "llama.block_count": "32",
-  "llama.context_length": "8192",
-  "llama.embedding_length": "4096",
-  "llama.feed_forward_length": "14336",
-  "llama.rope.dimension_count": "128",
-  "llama.rope.freq_base": "500000",
-  "llama.vocab_size": "128256",
-  "llama.attention.head_count": "32",
-  "llama.attention.head_count_kv": "8",
-  "llama.attention.layer_norm_rms_epsilon": "1e-05",
-  "tokenizer.ggml.model": "gpt2",
-  "tokenizer.ggml.pre": "llama-bpe",
-  "tokenizer.ggml.bos_token_id": "128000",
-  "tokenizer.ggml.eos_token_id": "128009",
-  "tokenizer.ggml.merges": "d0cbac1fcc9dcf03724b8db5c9bfb593ae1cf68fb9bc72eb1d15274dcbbf618b",
-  "tokenizer.ggml.token_type": "d70a88809fd7da6f1f028622685cd64268a7a922c5d343c96f25b66327358978",
-  "tokenizer.ggml.tokens": "765b529dbcbc42dd202ce657341c63807b51f3b07e09898f6aa6196326865d5a",
-  "token_embd.weight": "b53102a11d9064bbd404833e3464b1b13e08ce73300b442312cccde2f19b2698",
-  "blk.0.attn_norm.weight": "7318df3cca9e8d153ff0a503026a1265e63d20b2a8c1dd7a2769585082b5d1ee",
-  "blk.0.ffn_down.weight": "b950806a1fc722c9fad7fd0b20c3c0a7fb50f14395e1e7663a590bfd62e20900",
-  "blk.0.ffn_gate.weight": "e73e580af6d4f08e060a74a3c25efdf5d3bed99e183d95a5a85ae859014839fd",
-  "blk.0.ffn_up.weight": "c8158af679ef99746da1befb67eebb19489e0bbe6ce7d97e13e348508244e516",
-  "blk.0.ffn_norm.weight": "7ec69c3c31e95e49a3359003b0033f6b9e85561a3e3fd83e7476661ecdd756bb",
-  "blk.0.attn_k.weight": "2732303257bac969b4964e0e32ec08b5a7f5c031bb02bf6ac4467b3ea0ebcf1e",
-  "blk.0.attn_output.weight": "ecda1d43b4ccc91cd5b366d7e7a275353990ac78561a07c83d9c77031aba12dc",
-  "blk.0.attn_q.weight": "569b1f5faf92b6f00910cf7effb2d5862f91038ce5c3b0019fc10e5d79fbd5e1",
-  "blk.0.attn_v.weight": "aa8416c5ef7e32fb54a1f20d6ac651656845d4af240564b397c39bd83e06e3b8",
-  "blk.1.attn_norm.weight": "03327e02862908c2a44b2f52decdb924bf4201f400b46f8037a9cb2e1d7a61ff",
-  "blk.1.ffn_down.weight": "5a83a87603f38c99f8e1e370a2d5f967bb45ac51d881a609304a7811027321e0",
-  "blk.1.ffn_gate.weight": "31da0572c79e655186c721c231376f85e56cdcc6257c28d08c8c5b40d5c22b40",
-  "blk.1.ffn_up.weight": "e0c811d64ca155c8de10a868e72015d43888834804614ee1aa2953129ffbc90f",
-  "blk.1.ffn_norm.weight": "5861f313d6137d6f0f904d423df47fffc6069e224ff746e1b637ac9c7f0af862",
-  "blk.1.attn_k.weight": "5fbbec0acca6457b9416ebdcd90e526885d0224537b7628f6be376a7f275313d",
-  "blk.1.attn_output.weight": "b237c9763fa3f75166a6f70b70f1566e77d0d89dfa164ed1b3137393e90575c3",
-  "blk.1.attn_q.weight": "c0a9cf4a98b4882b16f3eb2b49d933793dcc5357abb246fd3fe3134ed2b12e1c",
-  "blk.1.attn_v.weight": "96867111727200cac1af7865189dd41fd62b47584e5e5f33a91f1d34509cbd40",
-  "blk.2.attn_norm.weight": "f392f8a88ee3a95b1cc19c40dd4ef66317037b0faaa1800f610779e129ee0539",
-  "blk.2.ffn_down.weight": "73823eef46632aedcc8c1cb08a736b6aa97ca97842cd1fdfc5567d8dec459662",
-  "blk.2.ffn_gate.weight": "f4909ae19fc3848b00bb8b9050122e74f8e903b89e22937036f4cc9fea20a718",
-  "blk.2.ffn_up.weight": "16f4904a3d814ea68f00519724fc4943e48444a84c786bda39aa5efc298a7d84",
-  "blk.2.ffn_norm.weight": "e3ccdf56e75cb969f6f69c39caf6daf7c4e70e89e25df0f4d2e4bc60e159aafe",
-  "blk.2.attn_k.weight": "c3beb1e0a11bcf007ef0f0d8f6bdd3082d8b29090cd29597846b5d51e308a8e5",
-  "blk.2.attn_output.weight": "bb9f66c32cff51154fea92933c2cd62549236f8cb1a767f9ef28d3f99809b343",
-  "blk.2.attn_q.weight": "8eba394132eef2a05c5a92d62d2376000f7948448d7a2dc74e6b608203add20d",
-  "blk.2.attn_v.weight": "88f61f77c53567c617db3eef8f30621109a750e679f6784f7911739bd42c2f02",
-  "blk.3.attn_norm.weight": "7b996675b7ca75fa24107b3ebe0788653ede0f49ac83b8659d71ff54d591f81a",
-  "blk.3.ffn_down.weight": "2cb332bc05e4821962fdc9dcbcc7cc12630f32117711b687d18fb53c0bc4fbf4",
-  "blk.3.ffn_gate.weight": "340b387c7f208c8f0a6db904ef8d87c1e84b7d6ad57177abd32d86c8d18b760f",
-  "blk.3.ffn_up.weight": "07484433f8a7ee061c55aa0de2ecc009f769b0617c9c0ec096e9bb2946df9f0e",
-  "blk.3.ffn_norm.weight": "4f1a4ade36b393af341240bc894a2aab09cff7e4d56dc4658445deb107f9371b",
-  "blk.3.attn_k.weight": "483dcd96acb4528df84b9842970994630dbd82b8715ace394aa8b39fcf8d6291",
-  "blk.3.attn_output.weight": "beaff0810687923585642ee11d929cbf3b43dc6f87f30ddb552c222ab57bdbb3",
-  "blk.3.attn_q.weight": "0739355002f6fce520863add697e0ff25fc88215322dc3f993be7bb68dcce7e8",
-  "blk.3.attn_v.weight": "c216d17b6d90ee3e07f82598b8161fae34de2f392dbb0f745b682b578c324767",
-  "blk.4.attn_norm.weight": "91ab405bc4ba15bf63af233f266aa43aaab43789a9e6596e14a357c2ac7df217",
-  "blk.4.ffn_down.weight": "620f34ee75cdc73aecb8949af5fbb0d2437fd81422b6d8eb7acfc52addb9fc68",
-  "blk.4.ffn_gate.weight": "f6feec7bc9acadf35ec22532f8998d8e50f31afedabb19263590dcf8b9a92eee",
-  "blk.4.ffn_up.weight": "4a72af7cd28fd07b038f6cc4406678d120517280236ea85d9e76eff40ab2cc22",
-  "blk.4.ffn_norm.weight": "1805b37b44d5d682bdbd2fadeafb763ee001617d7870848cc487079ee34b21f9",
-  "blk.4.attn_k.weight": "a1e4f9d97cdf4c1b0d177cf00c4e32d1be30c1984a239b3c9bd73f8848888853",
-  "blk.4.attn_output.weight": "a1547e2497c423b0aff0eee71d9300d6fdf4e4986679418b6e637b69a9a6720b",
-  "blk.4.attn_q.weight": "0677483a9264ea6803d03d304d87a54632242cb516e8b76b6e3e8284c2f4de04",
-  "blk.4.attn_v.weight": "02691ba3af344fcc1969428ab0df811ac94aaa2fd91b0dc4ec1ac0a58806980d",
-  "blk.5.attn_norm.weight": "ba9c028335e5c895b87a5bd1448ca429248f9746ed97bdcb8679923206117156",
-  "blk.5.ffn_down.weight": "ccfdc9006acad1940a6bc05042a3947f1066acd671e0bb53b7684e9eea9ef5c9",
-  "blk.5.ffn_gate.weight": "623157679f1e742ccc3807c0b0153ddc8450104de75ec62f1370ec3807c09cf4",
-  "blk.5.ffn_up.weight": "05748804c65091f963729b58b085f58351891cac8a2861f5eae26b06aa60b2a0",
-  "blk.5.ffn_norm.weight": "84bae55af2efc8b8429f09056c8c04990c466dae31cb3f9356038b8957f1b406",
-  "blk.5.attn_k.weight": "8c766180c726b037d587fc52371de6e3307140c52409011609d1225624b6a3eb",
-  "blk.5.attn_output.weight": "490b582b3b1dc151ae55aee8b6743dad6c01fb49e43afefb6e68394b74be3d73",
-  "blk.5.attn_q.weight": "6f7b8ca4d9025ec836a44bbcca46be30c66b471a9fb62943ddff8288b3731409",
-  "blk.5.attn_v.weight": "9f70df3ba00c9e723214b3da83ff435a2163fff5915f75515c9664c05c866c27",
-  "blk.6.attn_norm.weight": "1a4a66613a682df6f061fc7c4d986f9f7e9175b62f0c42fc1ef31db536bd5942",
-  "blk.6.ffn_down.weight": "c56f25e4e49b443dbc82d88311ee63bc1f5002cc67e52f4787fd5f003aedeac1",
-  "blk.6.ffn_gate.weight": "31a5cf1aa9b831a81588d508550f51fc425f9517c43254d4ef7096d38029cf04",
-  "blk.6.ffn_up.weight": "ce135f3a1163e0c9297a615bdbe68a67ead21edce8debbfa9f6e15e6af8d4c94",
-  "blk.6.ffn_norm.weight": "4e328ce0648c94e732bc40501858ef6262ad1161e2e407b0cdcf4813fa9d45d8",
-  "blk.6.attn_k.weight": "1eb1c4c9f9c4c7ff7f5429075e0dc6a7782bed55109fa88df209a817dd8ef960",
-  "blk.6.attn_output.weight": "3d32986b56873b88655ee1edabdd413fdd9ab18b82108c9ce90bdbc2d3a6f3a3",
-  "blk.6.attn_q.weight": "8432f583b3a2809c99c393f9beb077cb0534dd5d247c17108f2986cadc6651f6",
-  "blk.6.attn_v.weight": "5045381513815bb91839dbac8335ffe49bbc7b0008369de7ea97eb676c5e2b36",
-  "blk.7.attn_norm.weight": "3dabd003638ec2499bfc8a48c49eef34276caab4fe76894eb963207848c2fdaf",
-  "blk.7.ffn_down.weight": "194fae858608bdcffd235be59ab119d0b91c8549f864ea06dae69249e099935f",
-  "blk.7.ffn_gate.weight": "00b24c29c30246892bce0791be804a89701d4c1332777e0bcdad5d9d5666604f",
-  "blk.7.ffn_up.weight": "44d7082a5280080c90cef9e19d410391de34f212ca0736377769b8ddd0c82d5e",
-  "blk.7.ffn_norm.weight": "21fe8a7fd6911c64e0d15a788b3b4cb6d71dd6ec51de65f760ee89afbb6ae53e",
-  "blk.7.attn_k.weight": "57a149eec5f6744a9526cd3925ac073f9d12db0fbcb5afe042ef4dc846458c44",
-  "blk.7.attn_output.weight": "0e9c28a3e81a2880251ce5eed77bcb8be8aaa1a51c9cb6de820b47ed83849fc2",
-  "blk.7.attn_q.weight": "15ee75263ee4e2a43eb322bc159ae004bb7d77e3a7e63ee4ddab700430693fff",
-  "blk.7.attn_v.weight": "440aa970bba4bff429fd7b7b1de21f2ad14fb2952b776cfa4acee68d7c6e9b8f",
-  "blk.8.attn_norm.weight": "af5b44825633c42c1ae964c82bb2be6a242d3a751f0a91f1bae4f593e8f5b6ec",
-  "blk.8.ffn_down.weight": "b11c14c76adca94fa200496dd2c10743becb23aab6642443ef1ae6d8710edbc1",
-  "blk.8.ffn_gate.weight": "7bb03d3325bf8637ae2fa1296b0651356515578d46a7c5ca65c7a923d7de27bc",
-  "blk.8.ffn_up.weight": "b956ef0a0669b5a9c9bf3a8da2d1c24f52d331cfb7354f6d7c51bd65be355e30",
-  "blk.8.ffn_norm.weight": "c78c3d748302edfef76f71ea5cb2055c94352122eee8b9b1173779a1814d224e",
-  "blk.8.attn_k.weight": "c0fba6a596ed9c1c32a7055c31a935a8b31e42b77282ee47c1f03ee3bde736b5",
-  "blk.8.attn_output.weight": "83cf9947080c5d8d571f04a842bc3dcfe7bbb0195fb25b346e22635e8649f2d4",
-  "blk.8.attn_q.weight": "47409350a576b333d97b7c877d69f47f46df504f3765102dfc0be9e521c7ecd6",
-  "blk.8.attn_v.weight": "1999dff91404fdcf1ecb34d9eaaaa9244ec7658a74dec8feb7cfd1fddba0347e",
-  "blk.9.attn_norm.weight": "1e6e29d5c3889ab4e1b0a5b9998cba60179b0f1fca133515df49cbc19d092593",
-  "blk.9.ffn_down.weight": "acb898a6490adff592e10b4c62d70edc5941661ee6da44658500e9205357c8e9",
-  "blk.9.ffn_gate.weight": "4cff63013593aadc3ffbaaa6ed70ffdba1224cd43c3644bf6f4162b5ac1ab542",
-  "blk.9.ffn_up.weight": "f985b5a2d6cf4fe32c7256301c3c89b8ad22b59e516342c52da42d8110766a4e",
-  "blk.9.ffn_norm.weight": "0d659c538bc6b21ed0018f107ab674a7424a00a42946c80e07208b479b21918f",
-  "blk.9.attn_k.weight": "f67611d888780d1b38c1c146b361c65310c8183bdf64fd73e2259985c6e8517f",
-  "blk.9.attn_output.weight": "f12ca1fa62a02ddc3f77f798bfb5707e0c50bf18ee0eaa67025521a98355f26b",
-  "blk.9.attn_q.weight": "3865185f4361a645b086ad47b72904c095313fb1c624e511647bf1a7dfc1c476",
-  "blk.9.attn_v.weight": "92125bbfed63544ab56052bd1e4aa453bbf34c795249ee54cde54907c8c6d1d3",
-  "blk.10.attn_norm.weight": "5d6bfbe545bcc2fcb2fc75c68f64b1f4c918badaf53e0156fe2d88aa977b2f94",
-  "blk.10.ffn_down.weight": "1dd9da8b0d2696ab5531fbca8a29c7d67567620a9d3e5fc2a19ec5d7e4c6cc8a",
-  "blk.10.ffn_gate.weight": "6e55e7f014edaebda0ac6819a426221d3b025c27312a2e18cc5806f31e3db226",
-  "blk.10.ffn_up.weight": "d80dde54af5db51241345ee8d64c1972608644f4deeac1e8195dc423bf27474a",
-  "blk.10.ffn_norm.weight": "f6ca65951d58ae3379eee8247bec34ebd0db05674cc9295593573841b8a55df3",
-  "blk.10.attn_k.weight": "b58e350bd6b49aba0fba4e4dd6865de3a2a0651ab865dbf2419b627b53ffc187",
-  "blk.10.attn_output.weight": "6b26a986e12fe66ec286a21d7d5af5eaa1bfe6f2bf502165d270e4497235a54a",
-  "blk.10.attn_q.weight": "3440e0e5b7e0d1e426424ae5a33f4e057be623249e9035ea12e57dbe5d3893c4",
-  "blk.10.attn_v.weight": "ebfadcfe14bcd6dee933053df0a67e12e7a196d5cc45728c1ffb2a2daedd5ca2",
-  "blk.11.attn_norm.weight": "3ed057b9576cd2de84507ef64c7646dc478c651efca4c2024cbe91a4f3fbf0bc",
-  "blk.11.ffn_down.weight": "8ff1c2487d22f5c499761e4eb721418f141f960160d0bab779595a34e4d68898",
-  "blk.11.ffn_gate.weight": "9c74e4507c7e45bf39b7cc7402198cd1dd77e3fff8c625b0413acaeb16efeb9f",
-  "blk.11.ffn_up.weight": "4367158007161d29939e00a322bb6776016e43f648a94f9b08a96a477aae75be",
-  "blk.11.ffn_norm.weight": "1cc0288c1491072121f4c9a0af20be0e13af49895696a3320e4fcac608768de3",
-  "blk.11.attn_k.weight": "066f5b3c144fce1366835e1ebf376f768b333b8ae29f5b478c42d1d0c809c855",
-  "blk.11.attn_output.weight": "e0d9f3d3f2c54aed59c02713ea4fb562799ddbacbe67ca3998dfc887bc44e47b",
-  "blk.11.attn_q.weight": "28d3ecc8a88cb3815e89a7f7a7d043da7a71f702b337a126e4d3a2ac1cd6370f",
-  "blk.11.attn_v.weight": "7c5cdef10ee73bca0a3b9f6ece5f0a0155664e0ce3d8de90ccdccfab5545e5e7",
-  "blk.12.attn_norm.weight": "973b133301a1af760cd7b3a7955371ea0a750808b442deb6adaf7b98482bd0c6",
-  "blk.12.ffn_down.weight": "d6c87b4b4ca03f75546ddd6a9e7fca720585a309188723c1ace8122438d4b200",
-  "blk.12.ffn_gate.weight": "2189a6e0cab1540bd05d6089b922aa8fd694be51255654933c165f302a0c955f",
-  "blk.12.ffn_up.weight": "5affbec19b58d092b9305721e3552481fe2eff51269ea3ed91cda3b9ef84d4df",
-  "blk.12.ffn_norm.weight": "f650fd42a34e950f758b4a130e7b8b1a712b1dcbede0291bb8edde47aaed0ef6",
-  "blk.12.attn_k.weight": "59b1e86f10450a7cc188beefc0856d2dcf44e8d7fdd9cd8859c30ec1ebaf24b6",
-  "blk.12.attn_output.weight": "446b0d36b2f66bd72a2323f4f4e9d85a0f621e9a58872e89a27248d6b1123238",
-  "blk.12.attn_q.weight": "3ed6bfd39f040301ed99fad882d3e569769d594259f9948445bef0e44ec881fb",
-  "blk.12.attn_v.weight": "e73652cd5d0029b1931be3ba9d82508f6696dce5a29d085476a54fb7a2ddbabc",
-  "blk.13.attn_norm.weight": "491b85278c0bd67bd31b9b8a9720902c244bd067e53a4a03641b7c0994782e82",
-  "blk.13.ffn_down.weight": "ad71cc248a85e9ced49307a24a9bfae01d387e979a7689c82ff59998e09741f3",
-  "blk.13.ffn_gate.weight": "0a55984d53971fab97575ee0ef5882013be7fdecfa76e3fbebb5dc85a07a14d4",
-  "blk.13.ffn_up.weight": "378b697b35e2e53c0de98e8e29b73d42ae3ec112ec16129aa5997a9e2f3b5943",
-  "blk.13.ffn_norm.weight": "f8aff2f69ab286210fad45a62b03f8d10b38f96a420d7baadf6b95d7b0b0bcd2",
-  "blk.13.attn_k.weight": "25ceb841afb1034831bea7f4d6a6c578def2ce4d4c412c780ef147dc9a598360",
-  "blk.13.attn_output.weight": "a242b322889c6bdaa14b67a7bab593db39df8eea3721638ef639abbb74d482e3",
-  "blk.13.attn_q.weight": "d80be9945a369439e835c55cfb0e97828b8a66bb7ced534d9059c92487bf20a9",
-  "blk.13.attn_v.weight": "ac33274cf9b67979d9ecdc967a55175afe0c9c4aeeff6391433cd9840c818706",
-  "blk.14.attn_norm.weight": "12a1e1091de5b2da12c9e7c0b1c8e6f09ce2a749733cf7d5240445b8e21cd093",
-  "blk.14.ffn_down.weight": "cfd41965c88266e32bc2dcdadda512499c35519e8686fefb9a7f249ab2291eb5",
-  "blk.14.ffn_gate.weight": "8dcfe774f07a095c7c6cf0a901c9df70d938bad7b5ba347fbc8f694e7603c0d1",
-  "blk.14.ffn_up.weight": "c7995577fe4a72ea0fb17c4a7b6b87b959072bbfdd5edacc6c367d43465809ae",
-  "blk.14.ffn_norm.weight": "81c41ebde41739e7016ffec31d2256217b825dc3cae049a935f5f61a60d22003",
-  "blk.14.attn_k.weight": "fb708bdebe4384f5c4b479c110028554f4d122f166b8091eda7d8d65e6780eb8",
-  "blk.14.attn_output.weight": "f5295caf2dfdc60553dcabe17537a80577e8b153c902247daac058df23542514",
-  "blk.14.attn_q.weight": "c12b7a3601c68c63ab5dc9d2599ebf3f3a10abc2c59d3a2126fffd5818f2763b",
-  "blk.14.attn_v.weight": "1ce968d9149bf0d5e237d52cc6d6433565b4bbf03252a736262bb00a2b34a687",
-  "blk.15.attn_norm.weight": "266fd2c36d7dcefc6b6bb7f1c9374c41f2bab5d6c84a063b6f91c4f682dad3c4",
-  "blk.15.ffn_down.weight": "6154886e9ef0a6cc08ab0d264a35f497e6f0987efdac992ed04e87088bea7801",
-  "blk.15.ffn_gate.weight": "183d9fd3c1b5657840099053d2fd3f72ad953b1de523296159b7761f20491a76",
-  "blk.15.ffn_up.weight": "51546d4498842ae2340ee226a0888d5f61e7d2ca4d052dfa06a77b0451242d3d",
-  "blk.15.ffn_norm.weight": "ef7378091a41a25a5f58bf1bf9d3bc64ea562e7f421e1c232b1f177c30fd3500",
-  "blk.15.attn_k.weight": "8d556ab8d9639324141774999b6eed0e91d7ee645bf3e7a3dcd200b2e7a00751",
-  "blk.15.attn_output.weight": "54aa6ba87def7cbe18b0c6ab3aff5c351cb3b6ca4a0d7b2cd5f75a1312991429",
-  "blk.15.attn_q.weight": "10731b0dc031ea8e0ef37bd7f010e0a78518a10a6df05a8bae48e3148b73ef3e",
-  "blk.15.attn_v.weight": "cbbe50c2ed7224866d3cf9b489c599f3ec41a4ea1aa3181e9f4e87e1fa0cefec",
-  "blk.16.attn_norm.weight": "387058eb39d4b28c04cf1368247417f1faeae8ae79d894c9f293457e0eaa00b0",
-  "blk.16.ffn_down.weight": "2cb26ccee585e933401ad5c82ed36ddacb3289efa0b28f8cf91b020ffbd9c333",
-  "blk.16.ffn_gate.weight": "d745985efb5bab42304e5d509024631efe35f92f2b2ec4931ead6db97ca9727e",
-  "blk.16.ffn_up.weight": "7a67bd195e0642828ca36eb7818149bb70c2c25f82de07e2b5807c520daf540e",
-  "blk.16.ffn_norm.weight": "7cefd061c8182482a89272f8a4e88a954b12609a62716923ca1cb3593b1c1651",
-  "blk.16.attn_k.weight": "d7968a2de67e755b4533e061aaad1cb62f8882af92dcad67f99d6d5112513439",
-  "blk.16.attn_output.weight": "9e9ab5788272ca3394ea89eadbce8c86ecc3fd75b7899184d6191c134ad9aae0",
-  "blk.16.attn_q.weight": "ef81c261b536c1a3a093b33f44cf2d42b86e5aa2d821674f07a0c80e992ed925",
-  "blk.16.attn_v.weight": "aef38e7958301b4a437cbdd2fbae6197f677b09269ec1eaf63188cd5da428d25",
-  "blk.17.attn_norm.weight": "28f6b289f1bc3131041e9f791b7a2a3a48baee0dfea27bf7051ebbb7ed364d80",
-  "blk.17.ffn_down.weight": "1a502829aafc6a9bd6bc81f12573bf8632d5c8c659f0dfb13c8b2411f3b1ec05",
-  "blk.17.ffn_gate.weight": "ddfd8aa0eb98846ebc9afe31366249159f46ae9815199dd70161527ed241ac4d",
-  "blk.17.ffn_up.weight": "4211a3cc247071bd361b30de2131d02382f552855062bf3b3e004c17992e5d09",
-  "blk.17.ffn_norm.weight": "647e5fa99a5b0d232af36d15816539f4d27e60a50a341b00aa88bb6e4474f8b9",
-  "blk.17.attn_k.weight": "d9125ff33a19c502c0f8846433ffc24395048582fc2f463d34a0301a82156f02",
-  "blk.17.attn_output.weight": "3d64fbb1cfef04444827f37c35fd9ad3413eb2165094d339ef89f00503f09de4",
-  "blk.17.attn_q.weight": "e5b29424028f578beca385fd82e29f37adedf3037cd51e5889d5a1ffb0428ca7",
-  "blk.17.attn_v.weight": "1809c5aaf2ac04c5d65539097564ad62796e87d24bb8b9ce5b095561a61d908a",
-  "blk.18.attn_norm.weight": "99daca58d001c627523d3adfbca1d95f04e590382a326866544d57989d5f4835",
-  "blk.18.ffn_down.weight": "84f30231ce6ca0f10227541dfc602d6418c1a210386b0c4926ef1656e7d4635c",
-  "blk.18.ffn_gate.weight": "ca5bbe4468b541740e54f69b9e08fcc8e478c344b70551dab21b1206acfbaadb",
-  "blk.18.ffn_up.weight": "0b3067b9dded31686dcfdc1e247eae3974a28a61ac59e9862758dbfaad64e8f7",
-  "blk.18.ffn_norm.weight": "8154a102232dbc0f90ce77ae5c1ff8f26f8b6e4dcf326e9ec1645749669e7960",
-  "blk.18.attn_k.weight": "25abb26021ccc481471a30e0d4cbeb7e1db29828417ec5136edeb93fecf09ac4",
-  "blk.18.attn_output.weight": "d87d481d9b046b68efa06ccdd4ed8cbf61e692d61114b75b7fad5ed75f5d87b2",
-  "blk.18.attn_q.weight": "cc6400379e15766992ff1293be79dc67682c28e9e15155a78109f4b64653b164",
-  "blk.18.attn_v.weight": "45c75cb1dd496aea3173aafe2575b841dd1d02cbe010b3198099731eb98f531c",
-  "blk.19.attn_norm.weight": "65389efc75297684773284ef8e5f8789a4504b636c9f33b8a32e0ee42499fa72",
-  "blk.19.ffn_down.weight": "4eefab7e939f64a17e4a214ca3c77a6fa110d94f677e2d6401086f70fc538b04",
-  "blk.19.ffn_gate.weight": "f1c0a59cafda66f466ab585b0b8b4861b58abe87a67cea1f6a488492242edfdf",
-  "blk.19.ffn_up.weight": "c42d045eef588db4a0e56960a57e110e1ff92eb8041107d19899165fd3b90f17",
-  "blk.19.ffn_norm.weight": "a8f33eda6d5d62ff5f333ad9771783caff556641f4e7df713451385676f441fa",
-  "blk.19.attn_k.weight": "0bab5d9e9083492bfb05a5a3bb23b79c0e7b99ef6a6644817b4d57d5c453b8a5",
-  "blk.19.attn_output.weight": "c99c551d70eafad0f7aea98fb6f9251635897168eb3895f76abf0d4ea3b3aa6f",
-  "blk.19.attn_q.weight": "c98bde95627c3b54c9443813ca50b4e14f518319681db6bbf7b2332ba26e9a60",
-  "blk.19.attn_v.weight": "ff3a490518cf64904db89ce0dc7d6eb89e870f1440e41883c6b55a221f82de84",
-  "blk.20.ffn_gate.weight": "761f0e317229cafe9d3754048ab038a0a84e9a287b196ab65f633139f2d29aba",
-  "blk.20.attn_k.weight": "45d13439b41066d282e8490a726785abf513605f46c79bd0c840f6419d27e790",
-  "blk.20.attn_output.weight": "a3b958d84b4a097844179b7d55c18fd0e4f319cb15e918c6fde33b68de1bcac6",
-  "blk.20.attn_q.weight": "127ab8e7d8c3f882874904196a02712bab42e6744fde45871b67350609d19f5e",
-  "blk.20.attn_v.weight": "5f0ad2d14a8ae42dd3bbeccfb33295687a14055fa92c54bc946249373c1c9f17",
-  "blk.20.attn_norm.weight": "77300b1755edc8c70089e0f45efa646056b9add7d8568b2324d2f3e62b64971a",
-  "blk.20.ffn_down.weight": "ab93d0e075b42e9017b701a070d561e698050d90aac4b4b9919256fbe50c3204",
-  "blk.20.ffn_up.weight": "4fd6628a07acc57a48d1ef83f81b7d7aa0bce569c1160a99d307284f8821322c",
-  "blk.20.ffn_norm.weight": "2a9e46b9e48e8e55215de56592e1f189530037c1c94a1428e3d6f106c7f26fb2",
-  "blk.21.attn_norm.weight": "4b3b5912c7bc61eb9da8e47d4651f896e85d9e59c4ecaa65df7acf3c21737298",
-  "blk.21.ffn_down.weight": "7146f931663d93b8771cd84405cd4802ea6560d0729b0d6d44588203c095bc53",
-  "blk.21.ffn_gate.weight": "b44ec5d64388fa40b90b3e9976d97a8b6800fa3b97584f32e64b03daffb8601f",
-  "blk.21.ffn_up.weight": "0cf3643fd23c685e17062cd11e116e17ce57a405e5e78953bab94cd62fe48789",
-  "blk.21.ffn_norm.weight": "4ef2cdb53da166df70b39f3e6b17af51848cfa5ea3c27ad6a1ae2a1bb1da1ce9",
-  "blk.21.attn_k.weight": "5d40f32a706f670c19972b14176bf660d5b045e3637b110dbf8d7de4ff32101a",
-  "blk.21.attn_output.weight": "18afaa916752ce16c9653ec0ec7e2fe60be55faa2aa5025d147be184adb75cac",
-  "blk.21.attn_q.weight": "2621daa5f858931514a4b2f0fe8d81cf9b96f541e6af99bfa7539e9bde8e34ee",
-  "blk.21.attn_v.weight": "63226dafc54c899bbce4aa49efceeedd8908e94faa613450fdda91f332b62864",
-  "blk.22.attn_norm.weight": "cf3058daab4d2c04387e7d169d1553bb8e7358eea66285ec067703f6ce62043a",
-  "blk.22.ffn_down.weight": "6a58d5fd220abdbac6cee7ba048abab794731af318f04982c2506df59413d0b3",
-  "blk.22.ffn_gate.weight": "d5614535324b03c7b91727a903b2a72f8d07ad17f7aa8b61ea173cf9b895069e",
-  "blk.22.ffn_up.weight": "ec20da3949566e93f66cabb67f8cd7eab399047ec6ebf5d43edfaf3669b82296",
-  "blk.22.ffn_norm.weight": "84c82f38f53a649972a44466fc476bf764e064ce18de870291edc302f3700e28",
-  "blk.22.attn_k.weight": "a3d2ecc37fde7c201176bb8abadf27f0d8ede9679a6034913e03d9db924fda12",
-  "blk.22.attn_output.weight": "5a3b8bb433f43a387df43dd371bdf80ddfac986dfeaf38e9bac1d7a0ec6628de",
-  "blk.22.attn_q.weight": "3a875cec661b4859f30a8fd2c866811184b25b68c9e36fe2663d299caf8b59c6",
-  "blk.22.attn_v.weight": "8717a83b79035058dcfd3ef6f8e5b36e71d77379e5a239e1899eef8766fb7703",
-  "blk.23.attn_norm.weight": "2b4a68a0a2f023dd646e4755c9bef17c2f631901154afd839edac7ac006ec99c",
-  "blk.23.ffn_down.weight": "29499b1586c6fc4883c9b7a9c8cf388035146b5aecf90c5c4c8c8e082c71e7d7",
-  "blk.23.ffn_gate.weight": "7d6554036d21c587b9b556428054f9c15cbef96d24b257f906fcef4ae38bd9c8",
-  "blk.23.ffn_up.weight": "19761ecb288d6ebd44b681c4535661583b1e19dc29e96d0c007333cd8f00aacf",
-  "blk.23.ffn_norm.weight": "37dc35500790a4ca33807b39cf7af65065e535dc25b9e94f3ed2759f61887ac9",
-  "blk.23.attn_k.weight": "717547d00323817b0cb40a72ec5f8cf42ecd1f9e3e42715c2cc5e38f07fffffe",
-  "blk.23.attn_output.weight": "a24786feb6a905fdf166d7500133757cbe494779d4ebcba9eb03046b319557df",
-  "blk.23.attn_q.weight": "6a2c4a98f138b928d22136efa163562691d3b4ed526d52d46a2fa2694a8f3965",
-  "blk.23.attn_v.weight": "c6e6081eb9c38a7fda023085957b460e9ea321e1fff408b38c2b58595c39979c",
-  "blk.24.attn_norm.weight": "5e6283f891e538670425f3e244b08dc6f96f33dfa4aefa913f8eb17212421850",
-  "blk.24.ffn_down.weight": "e09eb170f389deea0a4a1cbfdb52c12490768a2c60491b7bef8a4c445e2a08f5",
-  "blk.24.ffn_gate.weight": "af29d815cf49a38fc2ebd0bf9b2dd9933d023a29f2d766981acb9a1b53f09117",
-  "blk.24.ffn_up.weight": "36ccd9333426666de9d3088bd4dcdf5b624b09dca9e3a83a22fc0383f2d950fa",
-  "blk.24.ffn_norm.weight": "a88e1692318826db6ac42582d182e51a3c698c655d0e21e04fa086318832d07b",
-  "blk.24.attn_k.weight": "f7d61d6d1225289bcc502e3bbb0168b4584add0253218c1b77ac92ccef9a1c2e",
-  "blk.24.attn_output.weight": "85a1363b3ccc87312094c2195022687c16b0dad7fafb9e80bb4ec474d53c29ac",
-  "blk.24.attn_q.weight": "53482a2c008f42f4fad779ca323addc3712040149dfc12f782417756388a72bb",
-  "blk.24.attn_v.weight": "67498272369af7dd10097c73b07f731b565cfc9a559e711cc0d526389e7b44e2",
-  "blk.25.attn_norm.weight": "98dd617def5cb7825ee4833132ca2da2121245921585e1d9e36b93344adc321b",
-  "blk.25.ffn_down.weight": "7fd477d6c50aed5f424a878dd284343379cffbee8a34c0b6e55100c8305fa13f",
-  "blk.25.ffn_gate.weight": "f892c9806c8ec22e8aa746734ac9213428c534921cf161239e1d249fdb5d1ec0",
-  "blk.25.ffn_up.weight": "528bed14c9bf9762f790525ee40412545221f4321d2a2323fa8e73c58b7643c5",
-  "blk.25.ffn_norm.weight": "ca5831966672e7be6a578feeb631ec3570d3b5afe12860819ccb96e896ffc346",
-  "blk.25.attn_k.weight": "610d3068cc9b20401f0c3a0efea39a279dd9f564fde19baf3403b2ec2319e4c4",
-  "blk.25.attn_output.weight": "798aaf702e53b657265ac3b5e6caf3a0ab515bdadfeb1a3a156b4f3bfba76666",
-  "blk.25.attn_q.weight": "8a7fa25248de83029fb97b51d036a01baebe31fcb4be121ab00dd8b7de209b10",
-  "blk.25.attn_v.weight": "2a53d5e9f8a1218c66958c6388d3b37400a9af7956c785024ca44bfbc3c7d371",
-  "blk.26.attn_norm.weight": "5f44fc043481eb0771f3e6d2420bcbcf73140afb9a9feb8eddb6575452acebee",
-  "blk.26.ffn_down.weight": "944a60a409d0d5b6a851e33c69aca152454b691711a8b96f5bcc488772ab2833",
-  "blk.26.ffn_gate.weight": "2a0ca4abb3de5593e6693d8be69b63d6d1a639855ac8332a75f520353f030c62",
-  "blk.26.ffn_up.weight": "0b1df496163f9ac07bf89375d3eb441b51a81d41b47d769a04a61efc18dbe35b",
-  "blk.26.ffn_norm.weight": "56b8dd046e9be6ea71f7efd80dbd14e7fb1aa020d3cd38e063275f3873fd12f8",
-  "blk.26.attn_k.weight": "b1dabfabb970e6971c7ea6e53c63cf7ef56341e6a2edd9cf177785cad9af2f9a",
-  "blk.26.attn_output.weight": "39532c7e836baad164a655fb97ec5114ea4da37ffba9fdea2684f6e4450e6f84",
-  "blk.26.attn_q.weight": "8f48bf6aaa1252bc149e98af2be1777a5c0d2c3274c6d314171ea9344a41b604",
-  "blk.26.attn_v.weight": "02fb145f7fd905133750e90571effacadddfd3f4966552dc59982ac3900ab8c4",
-  "blk.27.attn_norm.weight": "654d168fc3cab716d91261f5719f180b7d697218401633b4878a759f1b5283f2",
-  "blk.27.ffn_down.weight": "2823272bec3a1c12f02cc4cb24aa4031abd7e9dbe0b02676e2305b21671818f0",
-  "blk.27.ffn_gate.weight": "b1a1d40cd02f97182cac17a79971d1934ee0daf3aa0bf11303568c636e208a64",
-  "blk.27.ffn_up.weight": "ed62ec72a020d070e64eb7b50237b32213944727b5b2427f45d989f50df5fb2a",
-  "blk.27.ffn_norm.weight": "c69649ac65d694b306a905dee8b03b89eec1ed188b1eaaf38f8e29d4b12e38a0",
-  "blk.27.attn_k.weight": "cc57bbf413f1fd227128dc66efc8590c73634cbd6f96d01ec4878b5e7ca6a925",
-  "blk.27.attn_output.weight": "cac407ad02361d53207b3c7e25ceab84dcb4347b8087055162e2efe14d11d84a",
-  "blk.27.attn_q.weight": "0af18e07cee12015761c07c94407024f4f4d77d97bdb24163db0e16669e2cef3",
-  "blk.27.attn_v.weight": "a1d08fbdfa40af773c5adcf93bd68b78a44ed144e3fc6bbeb8af02e937527eb6",
-  "blk.28.attn_norm.weight": "f39a51f814512b040a1082143150e4a49ff730f85cef49d7f77fc79d83e91f40",
-  "blk.28.ffn_down.weight": "74f29ed51055d1c1adb8f0660bbe538a27e016c65650f2d67efc6f1c84fa1b45",
-  "blk.28.ffn_gate.weight": "ae48bb16487ded6781c60aafc0bf738fb4ae15729952906f247d216592ce249a",
-  "blk.28.ffn_up.weight": "543009727718ac22f11ee4b17815f68ea6f15ba1f3e7ed5ecdb755cf6417565b",
-  "blk.28.ffn_norm.weight": "b8f9e54c322079ff20a82b88948cdc2916c22c7db40b9a9ed6d3cbe89efb727e",
-  "blk.28.attn_k.weight": "55d055ba653b728d6e784f9e013786fed07115c9fdf23367e3941386d5e77db8",
-  "blk.28.attn_output.weight": "155101c03ddbf18f4fd0694bfc982f33c7bae25c9b087d6f5273c2bfbffcf2c9",
-  "blk.28.attn_q.weight": "1ed19bfdd22e9c14eca014739982492e9516d411515a8585f65cf754d849e53f",
-  "blk.28.attn_v.weight": "11ba854dd575c025d37256eee9041f6d1bd2b549a083d6409a09bfc1542913f3",
-  "blk.29.attn_norm.weight": "02b0bf5e2fcefd11a153cc988c81ba672682e4844fcf6442423e21a0e10d566d",
-  "blk.29.ffn_down.weight": "594bb692ec2779938721ff4748666ca8370e0e4fe85229503f616438b8884f5f",
-  "blk.29.ffn_gate.weight": "8bedcf47e91dcb2cf4093de56b048ee411faab6ff472f89ab2c9c113a08e6967",
-  "blk.29.ffn_up.weight": "e241a547b5fd6dfca8200b8141e21c1c487a96cbc4e5855f181a7ed1be91b642",
-  "blk.29.ffn_norm.weight": "e63eba5e4c6b288bfd9f15e46e236086456c8b7f1f9c732c0b5de84962a2e7cc",
-  "blk.29.attn_k.weight": "afe5979d5bcf211aebb526620f5974bcb0a2c39c8be71e815575c55d6385e3aa",
-  "blk.29.attn_output.weight": "9c944ed44b124b014906fc240afd3b90aed56bbd9567f2eddfd5b7a685b3cb48",
-  "blk.29.attn_q.weight": "e234e08e5c1bd9245a2edc8d63e9933b6b879f97c01392209cad4f55f05f3ada",
-  "blk.29.attn_v.weight": "5cb8e3e5f954e775c5a5e4de7a9a62b17e9c6931bb0ff0e2f82c4126fd3e1a1c",
-  "blk.30.attn_norm.weight": "a65483ee51a0b214144ec8a14f28ea5437586e9e12ebe342a57d1f8627ee12af",
-  "blk.30.ffn_down.weight": "417959da77ceb33ead4271cbb9428b195196173a893c44e52880a7ec61b4856b",
-  "blk.30.ffn_gate.weight": "a0d503ffcbe45dc927600bb98c9f6082487e65cb577ab545add400d666a87638",
-  "blk.30.ffn_up.weight": "f8ab957b82ffcd10b21303cb5e866209b6fe95f827b1b94e9a949207952d12c0",
-  "blk.30.ffn_norm.weight": "210c7ceb0514a9ef27b5d4d1b3aff6dde43f1af0345a050d71097940e0e73e03",
-  "blk.30.attn_k.weight": "16861b9abcf5a3fe73c93d977ca45a1e6daa65be0fd85c2cff53486ce2033afa",
-  "blk.30.attn_output.weight": "ca541fb2e57e2257118c35784845b0c731278af8db3036ac53d71aa1681fdbdc",
-  "blk.30.attn_q.weight": "f7834917748e26bb456b945e230bc926c228e93696bc01fbc2b134bdeeac71a1",
-  "blk.30.attn_v.weight": "9292783171dbe5eb689d17c9bda11e537f0e9b328fced6986c938d61ed590e81",
-  "blk.31.ffn_gate.weight": "e4766a04bcd8f937ba883c6a144101e546747804ca66c35c97281d6ccb47b566",
-  "blk.31.ffn_up.weight": "cc1e666116f7e6b06736db4aa4b81003c583f54f4d9200bfa48842249940e16a",
-  "blk.31.attn_k.weight": "fc80b57557687504efae7d24265cb7dc39b8f826bb3d897a11783012dbedc44f",
-  "blk.31.attn_output.weight": "215617f50a1f5d9b2250b82f3652b35a9e9aa0ad9ef2b485d73965a14b2b872a",
-  "blk.31.attn_q.weight": "274b4f1dfb0bdec28632705677049fb3e327ce6d9e1f3baaad1560439039982f",
-  "blk.31.attn_v.weight": "e641b8b926f9dfcbbf6b6da1c02555525ac4b1c306d96f20cfbba7d6662c4e56",
-  "blk.31.attn_norm.weight": "b3243c361d4041ddb892ce6862dd5091f57d87357e3c67e177451b85d8baf34d",
-  "blk.31.ffn_down.weight": "0a00cd3ecd5e91624a27f9e239b1de425d5ba3cfff82c256a11a4ad434abf3c2",
-  "blk.31.ffn_norm.weight": "2a0d67ea2bb1303975712243f07273c92fce83baa11b1cd6d8e42e74ea3c810b",
-  "output.weight": "768615f077fb797967844571c58b94d7c399d884d115be3ab4b0154504cae892",
-  "output_norm.weight": "7cc5b7ce10e5082000fa00bfa68af8c7c5da218e59e2c41cf2f1499d40ca229e"
-}
--- a/convert/testdata/Meta-Llama-3.1-8B-Instruct.json
+++ b/convert/testdata/Meta-Llama-3.1-8B-Instruct.json
@ -1,3 +0,0 @@
-{
-  "rope_freqs.weight": "80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
-}
--- a/convert/testdata/Mistral-7B-Instruct-v0.2.json
+++ b/convert/testdata/Mistral-7B-Instruct-v0.2.json
@ -1,313 +0,0 @@
-{
-  "general.architecture": "llama",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "llama.block_count": "32",
-  "llama.context_length": "32768",
-  "llama.embedding_length": "4096",
-  "llama.feed_forward_length": "14336",
-  "llama.attention.head_count": "32",
-  "llama.attention.head_count_kv": "8",
-  "llama.attention.layer_norm_rms_epsilon": "1e-05",
-  "llama.rope.dimension_count": "128",
-  "tokenizer.ggml.model": "llama",
-  "tokenizer.ggml.add_bos_token": "true",
-  "tokenizer.ggml.add_eos_token": "false",
-  "tokenizer.ggml.bos_token_id": "1",
-  "tokenizer.ggml.eos_token_id": "2",
-  "tokenizer.ggml.unknown_token_id": "0",
-  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
-  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
-  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
-  "token_embd.weight": "cde834ccac5e94324b25cb81b02d27312cac0c551b55a7e1d555d90bf6cb6e81",
-  "blk.0.attn_k.weight": "458bfdd9715c66e017c2447b1ed3c582963a3111479314e664faad8c914f42be",
-  "blk.0.attn_norm.weight": "e1fd60b95f713bae7b7e3ca933c64ae6c9cd1e8d808000204bbfdc19f0ba635b",
-  "blk.0.attn_output.weight": "df13b6a157d9d4f96c53b012b3b9bcd207d0c94144cbd22ae3ec13bb07d6c373",
-  "blk.0.attn_q.weight": "13b4126b4245bf06c915a93317c42b8174e05053535ec99dc576541e4cec7c25",
-  "blk.0.attn_v.weight": "5b1781d3a341214511b27eb4e268674ea3ea829dbdf8ae5a6bb89b3c0b33fafd",
-  "blk.0.ffn_down.weight": "49186f5d8148d316b07458841d13a2e66587f4af69b776188a809591ed9c070d",
-  "blk.0.ffn_gate.weight": "4397e30ece09136f00f4ff84ff49e5241b765a374deb8c5a12e897e2bf73473e",
-  "blk.0.ffn_norm.weight": "43260589aac3850a779bca3f9649f793bbfbe5db538361cb743b3830217f8287",
-  "blk.0.ffn_up.weight": "fd7ac918240a07566f6967527ffca58fcf433a30b78fdd6d84b2136d4ebd9987",
-  "blk.1.attn_k.weight": "209839566c7d235bdc20565a4766378b6ee8553133a5a3315abe8a85baa80712",
-  "blk.1.attn_norm.weight": "58c52986f7c69784ba327cb7f350923420782bee17fa39b1fbd13839d4005357",
-  "blk.1.attn_output.weight": "5067cc628449682665dfcf59b16e58fe2a9d2a81cb099f0fcd42f4f8670c6740",
-  "blk.1.attn_q.weight": "f410f9f0dd5edc09401af597d02e2a4c727f1502ec3ec3898321617b36c6df6b",
-  "blk.1.attn_v.weight": "d40fa49e07c102c0644e130e7909eaa93ed0d54e2edddc0759e721d58a4e4f5e",
-  "blk.1.ffn_down.weight": "594b1eff6ed4defbdd819fabbe2d48764984f08878a860bdb808511d5a25b8db",
-  "blk.1.ffn_gate.weight": "4cda97541e388a5bb607ce4cc8b3db1da7045830a630e7ba4d17807befcff346",
-  "blk.1.ffn_norm.weight": "66c13d7481be65b97aa474735ddc9674f33d512ddda76fa6fb45c7464b09f1ed",
-  "blk.1.ffn_up.weight": "1adc6de288ba4cc1237833ca8b4eb81107149842e38bc452e18e5cfe284338a2",
-  "blk.2.attn_k.weight": "5420423559f236ab22d85a00849f31e0cc6e9c7dd879de724393d8cd2b379153",
-  "blk.2.attn_norm.weight": "495fe1ab40cc52aa054ddd4f0c2d2790f4326c8d103296b1b38f3b1060db2a24",
-  "blk.2.attn_output.weight": "ccb83e7085381f558bfd65588c525ad2671feddcbc3887afb4038ad9c7aac348",
-  "blk.2.attn_q.weight": "2e8f77478392bc93c2a391f2e0f4a173a952bbab88a7aca099c6ee909726409a",
-  "blk.2.attn_v.weight": "d64512590f3b7ebbb9e77c2eb97fbda90b00d45c944f2b174f03a2cb11007567",
-  "blk.2.ffn_down.weight": "1de5084a05dcaa6b1bd926e83517dbe9ebe7fde79235fe56018b3028b1aa6397",
-  "blk.2.ffn_gate.weight": "cbea526b557f49aad8c976973cf367fcd12175b900f551984f498b9e07e4b7fd",
-  "blk.2.ffn_norm.weight": "530aa49b10c7eae08899d143409240deb95dae4e1d5bf78cea3b26393cff3ba1",
-  "blk.2.ffn_up.weight": "13a5fc19b96b4dcc1e9bd01998c8272ebe52034c1933ed123a506b711fae9a5c",
-  "blk.3.attn_k.weight": "1913b63a73305941d8cdc472e7f101c633d3357a78602eac0a4b49a744261075",
-  "blk.3.attn_norm.weight": "9c11bed5ab41f4adbfdae4ead65b525c8f19443e656a8c61ba412a4e1ad1193b",
-  "blk.3.attn_output.weight": "bb0b42c1d34779c5943272ed71f1dbb31ad8edd75f8bcd5c868f88505ac3a610",
-  "blk.3.attn_q.weight": "3461a1fe4e49f5319ea047cae98ccdb46528a3ec23831183fe87610b48c94948",
-  "blk.3.attn_v.weight": "82aa30be6a61526a41fb79bb28a2617416f5909f0477aa9e95e16be9370fcb38",
-  "blk.3.ffn_down.weight": "68521011ae03f5e3b0966127111afa8ee9f2eaeeef8d3a0b86b633e0332e9fbf",
-  "blk.3.ffn_gate.weight": "1e89e26338fd364bb679695968c65106382f15ad55c95cbb5ec9bdfeb766f432",
-  "blk.3.ffn_norm.weight": "c81932529a5a8c417c27b888dbe95fff8b447c2ea5f6f560444ec5d50b93832c",
-  "blk.3.ffn_up.weight": "305021735afd8669afefd713f56137248d5e817e60471a112ad06b7fa07ffe88",
-  "blk.4.attn_k.weight": "cc26ba5c5c28082a79e6abfe61186029e80b145252ca6a7924c437f0bcf2d51b",
-  "blk.4.attn_norm.weight": "302d251fdcc91f7468cf33f80b49484251d8917d7018ad264ab3a85c8ecf9ddd",
-  "blk.4.attn_output.weight": "a012f5bee3520cd4ce51f0076c132ebc3653309f304032ad051aa308f55f36de",
-  "blk.4.attn_q.weight": "3c8d607e447f5ef21e73af71e3c0d32fae16f91f31faae34ff06912cf9cb68fa",
-  "blk.4.attn_v.weight": "49f6c81a634ce46d71c2350206ecbd231b1732af96e4e4e67693c41a07e007d8",
-  "blk.4.ffn_down.weight": "e89504f311a4a34dc819a67b761022f14d71c43df3ead4f892c87aaa8e9f0adf",
-  "blk.4.ffn_gate.weight": "18b22f079a2fbaefe3572eec61fdcd996fd747724e2f0ff4f08cfcb43eb7bfb6",
-  "blk.4.ffn_norm.weight": "22415a492c168a0878912b05c854a631228b01c3ea8842e1d75989ec46c18a65",
-  "blk.4.ffn_up.weight": "f57379eae2874d8853f14ddf0f0fcc4ff1338574d5ed5d7e88331d5fb84f5642",
-  "blk.5.attn_k.weight": "d627af853c40bddf9762ce3988008c1ff17f2686fa8f73a0b5da38010147c316",
-  "blk.5.attn_norm.weight": "9ce01092c7f7f1c3ef72d6b794da12d77aa1f6a24fb96ba1b9bd5a0bcc3e2443",
-  "blk.5.attn_output.weight": "0388da8064c4b6b795ce2d8079e8a36535e82b2c9cf794e38ce8ae460aae726d",
-  "blk.5.attn_q.weight": "039b7ce1c909761fdf475c06cf14cabe5a90199282c89e4dcf460e95a4b6275d",
-  "blk.5.attn_v.weight": "c47bfd8d2496bdb6e00e03b903e15fd0ee806a515094ec257e43cc433147ab7e",
-  "blk.5.ffn_down.weight": "1d62e6708974bae318cbf00a8bf621d9ba0537e549ce4710a536520a8d14168e",
-  "blk.5.ffn_gate.weight": "8b42b1b11c92db19985094cbb50434e3a7c9cfea71ee6f21ea79eae7c49284a5",
-  "blk.5.ffn_norm.weight": "e0bc520f1505e687ec391d632a381d38d8ebcdec19f614a11a2000ab573e8b7b",
-  "blk.5.ffn_up.weight": "8cdcd17d2ea89bb9ab902dbc6bf3f827fa4ee029c6bf19eecbdefd146d8b6f2f",
-  "blk.6.attn_k.weight": "5dc6bcff89794d1756bf57ec665b58622d9352130d31082a6c66e1a079f99932",
-  "blk.6.attn_norm.weight": "13b26008abe0f119b5104b9d78ebd5e797d3cdd68122b93d73a3b4831a54d085",
-  "blk.6.attn_output.weight": "f5a49917ea70c3fb311ccfffbfafa63ab18416a5d55e5429b70ce8bfba57c075",
-  "blk.6.attn_q.weight": "d9c2f652c87dbd09ec3822e12876648fa32e86553ac25afab723b1cd9f8cef90",
-  "blk.6.attn_v.weight": "5ecc5fe67609a35151011cb526f45c56fc0a999079ae0ff37c755ca03c68c555",
-  "blk.6.ffn_down.weight": "0ec125ae0ecb2d9277fdb1b04f17efee94e37d0ae37311057c212ca2db3fe6d1",
-  "blk.6.ffn_gate.weight": "fa4d6d38355ee8aa3b80b476d65ae7e343c9b7770d7b097fc848ee8a6e091d1f",
-  "blk.6.ffn_norm.weight": "30e8f7defc627532e1739dc76d31223d45767391a431f925b63dabe334b0f392",
-  "blk.6.ffn_up.weight": "6b97cc32b290fa9087806b5d65aa6dc1760737730c8c71394cc4f30c2157f9ab",
-  "blk.7.attn_k.weight": "0231cb127cb7c3714cd72b8f39343891d7715a9bab2237ade9e7bc5f4ed2e68a",
-  "blk.7.attn_norm.weight": "7c3187f07eead7d219d98ab2daf87905e88d5f1ace109b6f5fa55dce3914981f",
-  "blk.7.attn_output.weight": "2f30ad972c284ae7c8eb0482053433495ebe8fe9c5ee2c28b4bc4ed1f33050fe",
-  "blk.7.attn_q.weight": "3a2b4b8d61cc9956d304fa9f82a9e65b4bb9fda2196670b16df7e0d8c43eff2c",
-  "blk.7.attn_v.weight": "d2aab97d0dcf0f61dd2f32848f7a8a99c423a4948a660a660a03a546972b8db8",
-  "blk.7.ffn_down.weight": "2270d520468c5549cd30023ff9c452a277058310104c4239a616373fc5a94387",
-  "blk.7.ffn_gate.weight": "4134a3ef71b3eac8f76b6f1a2e58625b3bae48081f175994bc3ed7d8b0d4f2d0",
-  "blk.7.ffn_norm.weight": "42df4abd4b8769b16f3930068f96960af1b061f1aeb7505384f272233b2badff",
-  "blk.7.ffn_up.weight": "c920549054ec16ff8c73a72f5d837cf4e11885e44db57c1c1c584c18fbd7a9a5",
-  "blk.8.attn_k.weight": "01c609bd3bf31ce65688f1f640ee413740e821330134d4ed1877a3065d1527d5",
-  "blk.8.attn_norm.weight": "48857411f769b00290f4e4f2e593e092781fdc2503f80c1e3eeda1b85a20f74d",
-  "blk.8.attn_output.weight": "90fb273f8df83744554bd59236515c16c5a5a698ca3fbedc17cc89ddcee354ff",
-  "blk.8.attn_q.weight": "ade617ac4653c7f00593dbb51837a468afef20a14eaab3780fb96ac3d6714369",
-  "blk.8.attn_v.weight": "c2c37496494864fee5c527d1fe1f88529d31c73f9cbd02ef9b2e9b23611ea50f",
-  "blk.8.ffn_down.weight": "2da58572e9ad79087c03cbb0c23c9ef69f93ec221fd5fe4ed92fb93871d23ffa",
-  "blk.8.ffn_gate.weight": "4483294e628edaa4901708e73e92c917bdd93b780fa01aa74aed57166f2bbf0a",
-  "blk.8.ffn_norm.weight": "c0cbb7a4f8123b62f0c4652a687f3b394802bc32870dc446eefb709e42043a7f",
-  "blk.8.ffn_up.weight": "9eaf8a2060cb9224cd585997cd671866c4051ad885c2c6d9fdc7056c2a5c0d89",
-  "blk.9.attn_k.weight": "5dd36c45fbc9c50fd35c36cd75576288506971eac5c5311d4f5c16ef60099645",
-  "blk.9.attn_norm.weight": "3c8ca64f2f75ed7c8fc1da010c23be787648139a96ca0ef3ad10be7b14942b8d",
-  "blk.9.attn_output.weight": "6277e1f833024f53c409be919ec76d34464a78b278c8f9dbf79e777746e3b995",
-  "blk.9.attn_q.weight": "87352b70d9e328c2d51d59090cf5ea5a046529864a890d0bc8986447a0a5c006",
-  "blk.9.attn_v.weight": "2efdf01161d7a82a9117cc2d87d37dba5ffefcf730781cb94fcc95130e48ff9e",
-  "blk.9.ffn_down.weight": "e7658a2ca984961c7ace16acb679387bedb1fef656b5330bbbf588db19673a75",
-  "blk.9.ffn_gate.weight": "773cd330d4ff5d64be8af00adf2e2722fae4e33fc26bb9d03549f6f4b3b0fe57",
-  "blk.9.ffn_norm.weight": "c8b86cd5c43b332f72060b807091c33a258e5dac01358ff4733b916cd34c9c97",
-  "blk.9.ffn_up.weight": "d8cc3bcff18bd46124ba2aa7caacc71220b44eeef6fccb993b4c6cb53e8f2c3a",
-  "blk.10.attn_k.weight": "964bdf3b4e77b915a216f750ff7b0f2eb1dd6bfa071358aef21010b90111044d",
-  "blk.10.attn_norm.weight": "59ed411d91d14775764eb514acb0895a75a10cbbfbc1c15d453bc50f8046cb7f",
-  "blk.10.attn_output.weight": "4d35a2a44cfe4ac0a83fd3ab0dcf1f5a0bf54cdb3b7be9fc353ed32c8a3eb81c",
-  "blk.10.attn_q.weight": "defff5339450dd881ac352f5c459293f39e07b9619ebd10ed632d79a3f310278",
-  "blk.10.attn_v.weight": "b9803e8d6a54acea58f662d4c0a5c8ebdf986676de7dfe12d4b288937881ce93",
-  "blk.10.ffn_down.weight": "eba856be64e4be20b92fb4639a783454dd92427250759df92a337e39f1971c08",
-  "blk.10.ffn_gate.weight": "2d5c509b066584db4de3632b01234e86edcde35409c5ebce18957dc80fe465e3",
-  "blk.10.ffn_norm.weight": "ecb9a8679945ff0273856624ce435dd250ffe5a440ea0861a5c84f0e4c44d2c6",
-  "blk.10.ffn_up.weight": "e76ec7e993f399af02958778c643aa78368e3067846714165eb5aba9d5f547f5",
-  "blk.11.attn_k.weight": "29c6d1f34bd3ba2f0904e57b32a5bf8dcb2834d439159a33edf234ce0b775677",
-  "blk.11.attn_norm.weight": "b5817b275149cd2abe18a6a10e19854605fc58fd364666744362ceee8cfe49f4",
-  "blk.11.attn_output.weight": "1e05653220e237cbe0cc770033e183c9a0eed5680510997409b16186c6691950",
-  "blk.11.attn_q.weight": "03db725ae669151e4d536e50285b3b047ad097f52475df208ed3e790e31a44be",
-  "blk.11.attn_v.weight": "27cdf1d4e971326c451a4615a0b79a8c7fe9508f9b76c0d52fa01971fc7eb403",
-  "blk.11.ffn_down.weight": "176938cd7c2966094f614cace8ba568b10532e45a0d438f80eccd19b6c2a7f87",
-  "blk.11.ffn_gate.weight": "9782339915dd6fa70013628a01524ee1d01ad8beab04068da7ac6a5ee7603a60",
-  "blk.11.ffn_norm.weight": "8245f6391e3be97811c0ff27f0d8f484ecc82a468a837c893f059745bfcd95eb",
-  "blk.11.ffn_up.weight": "15616ddde096d0d25e906375c548b6de4bd5576d1f6b68eefdc29f14e183af42",
-  "blk.12.attn_k.weight": "66dd21604993edd1b1fe547bcaa06f5bb7e31c9204902d147a227e4badf7feec",
-  "blk.12.attn_norm.weight": "23a69f85dd8a0904b9839cc5d0afcda299b74e82ae2642106224a1c820f2b761",
-  "blk.12.attn_output.weight": "4a98d132e376beb274a39d4ea9b6a1b870ad5c66625439d7ff6f45c229c3ca04",
-  "blk.12.attn_q.weight": "1c6c309d63afcfde32fe37257e300a78e25d01117e33490801107c0e75d1ea66",
-  "blk.12.attn_v.weight": "723d9e4ebe4e2b1974afa01d8f512b52933698fa36717dd47b37b07760c50a10",
-  "blk.12.ffn_down.weight": "00e0fb09e1f1fbbf3803f1dee373eaae7a93756b6e13063ab77f9927bc6f996a",
-  "blk.12.ffn_gate.weight": "89159f7f97aefb1e100107e3ac2d694e1008ad873f79bb953d60c2c1bb22724d",
-  "blk.12.ffn_norm.weight": "5f70aebd0e43a39d6373d8658cc670c13aadd7818831d3d84f761d5f688442f0",
-  "blk.12.ffn_up.weight": "faec21b446f061eb4dca561a3180712724347b77a71eb312e7afe9be9e89fa04",
-  "blk.13.attn_k.weight": "3d440825d19eac3b1753b34d94fee2b3a3cb6636c10b2703ffcf688d3c1eded3",
-  "blk.13.attn_norm.weight": "47b575e57e410738ad13fd3c74bb49c06b3d31030910834ece509cd1a5c6d9be",
-  "blk.13.attn_output.weight": "05436d8e613f4475741c1798a7c371b53d61b229507fa04fe23c504ba1f0e12a",
-  "blk.13.attn_q.weight": "002b5024ce520da41256e3ded5cdc60e5ae07ad9b202cb19d76ab511efd02b1b",
-  "blk.13.attn_v.weight": "c1f2d6763587c50312cee0d7140fa2c7ee326f5b172bc99b2d8946e08329cabd",
-  "blk.13.ffn_down.weight": "b5c4e0d8a3ff96cd76a135e415b89f02d28c28f7f3c16a36af31ef0ab8773da5",
-  "blk.13.ffn_gate.weight": "ae06e9e3d2e1f64c7ad23a4009dc904c2eccd7241f9f91c4974ab2504f116be0",
-  "blk.13.ffn_norm.weight": "e44a22321bcbcb4a3c345b504e939e8071370f54a8cd702fabdb40b97e0d7683",
-  "blk.13.ffn_up.weight": "7e6f366d538e21ad431264b12c011892d0be9dfe4c4da9f730af677f920641ba",
-  "blk.14.attn_k.weight": "95492d6417952ec24b2cab87bceb750fc7e95ac6b1944fc328a3852d980164be",
-  "blk.14.attn_norm.weight": "6b7b09e1c51addcdbb160ea59edf032531421c520ec5645fe1ff9ca4180cef54",
-  "blk.14.attn_output.weight": "75887474e4d72c218e6ab0f69f1bf3ec3dc414d51b36fc59df00cdb23421bb6a",
-  "blk.14.attn_q.weight": "940e33f76e48c21215d19e8a21234c8246d4d084381a7d9806aecb24b071d5bd",
-  "blk.14.attn_v.weight": "c58601cf5a9833f80f7f9a5b2656e8eab5eb133211446ebd48f8be15fed4ebb9",
-  "blk.14.ffn_down.weight": "f9f886e7f9b2a54d717b08947a25a0a93e8c2a5b8bcd5a907c06817c8ee3ac11",
-  "blk.14.ffn_gate.weight": "727ed0ee68594a3f59d704ed3240b6929f083b9c36650fb848d182315737245c",
-  "blk.14.ffn_norm.weight": "bd2471008ff1b2bae9aa26bea019393fb2bbc5b9493b8cec3ebd2c280fca24ca",
-  "blk.14.ffn_up.weight": "b006446769f51e4f93b503c4727deae897bc1fc7f4fad49f85024b63c4548d38",
-  "blk.15.attn_k.weight": "23bb70f9035356624039547a603e46be7d1e4403616eafc2451cc09c5373d522",
-  "blk.15.attn_norm.weight": "718cb371ca052eeb3bfac6ac506abb887df125271821fd171797a7f2d8dd6313",
-  "blk.15.attn_output.weight": "c76a2695a204b43a8e5acfa5720590b5d449a9ad9e082cbe3e80fab5903ea16a",
-  "blk.15.attn_q.weight": "2b3e4037b9e91bdd26d6e8d904cf39f948192dcf09bb6445cb55ca058d4f4626",
-  "blk.15.attn_v.weight": "7c15e89b6acafc8619e86aa9d412f5893ab17843ff2cfaf40eea9637b24910c6",
-  "blk.15.ffn_down.weight": "e16fd4bdc6d1c1209c6b633454df4992870c8cefb2cb0e8c92a7e489e9fb5d19",
-  "blk.15.ffn_gate.weight": "95a46bea366c260337c537fde06b4cbeaeec52484a69c3390bb1d178eb0525c9",
-  "blk.15.ffn_norm.weight": "37730293f704da265dc6d1896b3be00c39c0a41dab07f573af39dc30a481d623",
-  "blk.15.ffn_up.weight": "ba74a199da2d0875d7410824238c4ffafbda3993568812284a72b8800df91f15",
-  "blk.16.attn_k.weight": "f58f79a2a91c9a763adefce0c53a71eb5ce6bd8442f4af554b04b58083bff27e",
-  "blk.16.attn_norm.weight": "0c16e41b95e81978e0e0e3b338e2afe2d297426578cacee94de15df74e94eaad",
-  "blk.16.attn_output.weight": "ead22fc337514e4add49aee19720008558e52090466866e849671953a1fccba4",
-  "blk.16.attn_q.weight": "ef59c4e8fe8918c1add43d7e9c6fb3ef799dd3e1bdd731ec7b6a4a6f97c86048",
-  "blk.16.attn_v.weight": "902e6b84c2b64241470b13e6f412f859f66b4b223bcfb9c15d5cb1106b07ef3b",
-  "blk.16.ffn_down.weight": "2ad6e9eb4d8372c32a554395d460d17cfb02d6dbcb757cc962b6bfa36db4f5ee",
-  "blk.16.ffn_gate.weight": "825b2d50fcce3dbe6a5d8d8a50a95466f83ca4a10343efe67894c20b4628fb15",
-  "blk.16.ffn_norm.weight": "3bf6ac90befb0e17e077c8ea9454a8485a30f89f2d761ec7751b60c90aed1af9",
-  "blk.16.ffn_up.weight": "9fbdd08739b32411f5ab0252174d386bab19eb0b17884862f760429b7d41d78c",
-  "blk.17.attn_k.weight": "4033398718bf3674830ed1b73071ed8482b6dd4ef27f31a6c5fbb998321b6c07",
-  "blk.17.attn_norm.weight": "714f2e8ac9592966a0f1c02ee979eee8f84586405b992e8ee9543e840199ffa1",
-  "blk.17.attn_output.weight": "b6bbb618597d767b8f535117be68f92911e4a71d4eb4d8b5d943444151445ece",
-  "blk.17.attn_q.weight": "b84a0dc00ceb515faa2628125dcec502eed923077b21cfe900a4ff16c2e5f9ed",
-  "blk.17.attn_v.weight": "4387c7d6a17da9cc7a6bca8f4a75618b20407d570792056283a8e93b6ec65f18",
-  "blk.17.ffn_down.weight": "47db95c6f1e12b399c3eaf9ddba261782dd71173dd163b52af96541cf87b5196",
-  "blk.17.ffn_gate.weight": "59abaded0aedfd12f01df81f7a811e84db6a227f51b60abe9a247ca726e87392",
-  "blk.17.ffn_norm.weight": "b7e86445be5c7b722e01ddb98d5c7527ca86cb827ce0354f2c269e0f2558751e",
-  "blk.17.ffn_up.weight": "8e31c293bac649d2f60da4b3fc4a3acdce1111ec6058d8805eeeb242443011de",
-  "blk.18.attn_k.weight": "5ce762ab7b032511c131df81093b587871718c7097f79d8e07d707571f18a47b",
-  "blk.18.attn_norm.weight": "1f52cdc7af1f4dc1f0ef6ad1ad02e18cda32133654e57cfa9c72ada9c0b1d995",
-  "blk.18.attn_output.weight": "6486957f30bf8a88516e25772c6650f98b13923f490a2865a8752e36439d1cfa",
-  "blk.18.attn_q.weight": "93621c8abf69d2ca29c5207180eb628fb2b544d89de6c4a7fb0699be95534899",
-  "blk.18.attn_v.weight": "11604083b5a74828ac1d226af015ad5dc0215a1fdca44fa7131c2163c02d8156",
-  "blk.18.ffn_down.weight": "8f9997feb94385f106915df810239c9753b31efda2bf14bdf18a9fbbeec8233d",
-  "blk.18.ffn_gate.weight": "427c213b3a4e94af703429daf2f65766f70424d8230c123e7e712a18bceb5ecb",
-  "blk.18.ffn_norm.weight": "c45d305c4ea6a54013ba112f12dafaade064a32cf01317373464a3618d8ba44a",
-  "blk.18.ffn_up.weight": "a2811f2e73ac9eb9cce91a21a454e84e230a155244e2cd73f2c12aad3c9b8cfd",
-  "blk.19.attn_k.weight": "b2daed159925eac58c291e2f1e2000beed21002b03c9e1bc7e7a52e22240666c",
-  "blk.19.attn_norm.weight": "6307306ede2ab5bffa1bcac3f8b139354678c0376b1d9f5530c1fcb4268cfeb4",
-  "blk.19.attn_output.weight": "ebb98218b2a9c84d3fb6baeb02c5df264b7ab80d994d1098ba1cd47aa398effe",
-  "blk.19.attn_q.weight": "4f10df2ad09177e7528e9456039b670d07db22940a49417101b725d239c16724",
-  "blk.19.attn_v.weight": "30f1efc5114badaeaafa91fa466dc7fa14b1616db433c6f563ab851f7333a5dd",
-  "blk.19.ffn_down.weight": "be5ec7fe6b48855cd0015b0e430d1b70c620de87a7ff188c7c1afef546d7b6bd",
-  "blk.19.ffn_gate.weight": "10dffea4213881f8a9b583ee0fd370e033756d32255ed15053f794375b9400e9",
-  "blk.19.ffn_norm.weight": "e75cd24ade45dca78fdb0cbcaaa2d4a17d83a5a73dcc94ce0ec2d68fbdb2a881",
-  "blk.19.ffn_up.weight": "63e81bdb951410ffa81bcfba1b94a679ec9ebae59cd1623ce2651ed5d4c78bfd",
-  "blk.20.attn_k.weight": "c2fc5ad39e9bdd45e73c6e54aecc474388d944c4be1ee1921b7fcd035bad02e0",
-  "blk.20.attn_norm.weight": "aaa9169171937bdce20c1f057e94e9252f221cabacf1ced12e11b9586f23d308",
-  "blk.20.attn_output.weight": "a9f4fb496e4bc053e3f6cf2e72e22d4cd2b545ef6c32f7e782c2ef6ebcc21d4b",
-  "blk.20.attn_q.weight": "5a07ac619ed251494170b213921ef3fcc4c2712839da262516d9d5b8ea1ff185",
-  "blk.20.attn_v.weight": "d6689473105d241eacb17f09f06000ee237336916cf5ec4f48271c5b41bcb8e7",
-  "blk.20.ffn_down.weight": "74be38db51df736f26ede7c6b52ea787e385f181cb66231e2cced4556a25c9b8",
-  "blk.20.ffn_gate.weight": "ea91e06dc3d051c0ba0243b5a8bb40edbf254eadfb54fda7247e05cfdd88cbe2",
-  "blk.20.ffn_norm.weight": "5fbd357b3d6f44a7a91e8a4fc246b24303891b7957e0f3c32818ae5dc16ddd8d",
-  "blk.20.ffn_up.weight": "fe3290333e056af4ed12942ac72aeba97a6b562e2db05e79cd35dd07eab5b101",
-  "blk.21.attn_k.weight": "201ec6ee95f06ea5eb80fe86fd07bd016d3ae9ab6abd25d631834414e14a010e",
-  "blk.21.attn_norm.weight": "ea8154f93e06485828475a00b98cc397ac84768dd70e06ecc0c075b5712d7276",
-  "blk.21.attn_output.weight": "9f8af74d531478fd304723fd8e4e01578db598441b80dc7c960cb801dbbc501e",
-  "blk.21.attn_q.weight": "277de9953a8d3cff894ffd06c15ad0ee1407e319df0c1a693d4f45fa9c74ac7f",
-  "blk.21.attn_v.weight": "6bfdc16cfb898909b7788ddd39dd04b928f31d6732772195d53c558004638dca",
-  "blk.21.ffn_down.weight": "173877146cb94801157796ee9e5eecf3f46acb3b5e797f90b83a3fc22395eb30",
-  "blk.21.ffn_gate.weight": "53146713e2ca1be80496024077a028f6b6d749b02e71003c349e113b436f48f4",
-  "blk.21.ffn_norm.weight": "b28b97e18ab20a5c553ba422f7d7f6014f5902f1d62a69abd20d9fe19a5f9462",
-  "blk.21.ffn_up.weight": "5c39d0ac4d602b8ec8909dade93b2efcd6b6d9d84a19b252d76bb66dcfaab87c",
-  "blk.22.attn_k.weight": "01f26272c82917a87a3ccf922fa1d521a952b05de878241b7efe3525b617ac87",
-  "blk.22.attn_norm.weight": "5ffc96249d8873b506e9eb7158bdfd07fa1429e53c1951430ca7505d25f11c76",
-  "blk.22.attn_output.weight": "9c2201569358f720244b9c9497e4da02585a167b1414c8a506b85ad75ba990d0",
-  "blk.22.attn_q.weight": "906036eb4ddf027f6d920f9356a6a2a5e529b96f4e1231a0496d46b4434a5842",
-  "blk.22.attn_v.weight": "30ede8b0d166003a4b8a81fc99437f557719fc36e5c4dd510c9f161f36a47e73",
-  "blk.22.ffn_down.weight": "d04c164beabab30e1837b843e18852260efccfbb9d96a34ddd816e6fb3ba23c5",
-  "blk.22.ffn_gate.weight": "19c889db6b19179f0a62d5981a1506592c65de83760d67afbe00d202202750a8",
-  "blk.22.ffn_norm.weight": "4885eff2d851b32dbd306bd632c725857e6d164f0fa8b3d5857e572e6ef98ee9",
-  "blk.22.ffn_up.weight": "365594d8db8e95cf87cc33ac23947942dc326110175cc8ec5a07b5c7059089a7",
-  "blk.23.attn_k.weight": "badfea1569da0fc6ab817c5727ca3a69b07d9cfd622fb8be5e66678d5b3f7ae2",
-  "blk.23.attn_norm.weight": "8968f78a379ac3ca5458b4ed4251e8d9112aca6d6dd1ef6440b4bb0b380375a4",
-  "blk.23.attn_output.weight": "93e43393c03956287b1fe31e9735ff1cfe84f4ae56b83dbaebe96275e4e11831",
-  "blk.23.attn_q.weight": "aaff73c725a8700ae66bf26ac8869dfe96738eff23a8ff340de2ab53400a5795",
-  "blk.23.attn_v.weight": "3a86a8dcf14a746ed1411f5a7e634064bc4dfd6511c24cfeccfb2c9ebb6b4101",
-  "blk.23.ffn_down.weight": "d4da6f37bd7ef69bb203f7b0dd59f50bce37432c70627e6cf274ab81548af5cf",
-  "blk.23.ffn_gate.weight": "5b6072936c4a693923bb4e3d1473fd45545cb02fc07799aca458ef0449a04061",
-  "blk.23.ffn_norm.weight": "cd76e37025f84773180298ddb15e0d4ba9cfc7d832e19c791049daa47c6d9c10",
-  "blk.23.ffn_up.weight": "cde43b99b83124a13b2e4753d12674b3a61dfb34c04703007ced3e8e2aee1801",
-  "blk.24.attn_k.weight": "457379edc4cce4cbbe107385079019bc922264fdfc7bd1d1ae84343a81460c66",
-  "blk.24.attn_norm.weight": "0ce0dfab2edeede5da419fa7833db78e36222cf25c358d08f3ec664310f031fb",
-  "blk.24.attn_output.weight": "0cf91c2fd40c204d2fd4b9c85b69281e5ad4ea8442972fcd44b5fc8e835ffdf8",
-  "blk.24.attn_q.weight": "87ede30c09eafec6a4e6285674c1bc4637140b168b2da4ed34f36fdb6e176cc9",
-  "blk.24.attn_v.weight": "4c0b078b2798ca35d6d2c2258fe499820d2bc88700654ba4016e4b028f563590",
-  "blk.24.ffn_down.weight": "cdb8540c32b1ab988f984484928d39f6841f2131c1cebe90ad9456737fccbcaf",
-  "blk.24.ffn_gate.weight": "da2e0e913648b5526bd2bbb344038dd067639343aed3b413662b064b0db7556e",
-  "blk.24.ffn_norm.weight": "8940bd781c610d75eb2be63cfc8d869a3af05e53c963dc7fd4c6f653df5a80ab",
-  "blk.24.ffn_up.weight": "90cbac2a58801abe11ed6c24560aa4acb949f79429f2aa8ff129ac05868bb87d",
-  "blk.25.attn_k.weight": "90607131e36998e990ce718ad05cbecd1bcaed010931401ce6baa3b0d93ebce6",
-  "blk.25.attn_norm.weight": "fbf679c85656c04a6cf8fedd5412c1ace22960e6c2d47f2d43997827811fbb97",
-  "blk.25.attn_output.weight": "08412724ee7a2086514406e6f68fb9f622e10bac25b0c373b294709f4b09bd2b",
-  "blk.25.attn_q.weight": "9c1238e98a2747654a0d4371d3e7ea8b979867f609dc42482544f25591e85c7f",
-  "blk.25.attn_v.weight": "a57796a535c6cb09581cbafd6a91dc14adc8cca2a2465a7ffd0aec546cd84074",
-  "blk.25.ffn_down.weight": "f7e34e8a6391b480da08b52640613ccadce268373934b409759743a1735b74d6",
-  "blk.25.ffn_gate.weight": "b8d0b2f4612678b5ce42bd4a683f8024514b75fb5ebf6b22c600811e95582ee4",
-  "blk.25.ffn_norm.weight": "cde1fdba2369d315f3c6940a997c471ec891924e642505db580d732763bd7b75",
-  "blk.25.ffn_up.weight": "72e700c32ac8b9c47559c2222e45888a480b527ea512075423c5dc01678e2bb3",
-  "blk.26.attn_k.weight": "6ac83b3414ae75bf3a9055c32e49d2c40fe611ab21f8444f03d2f465d18122c9",
-  "blk.26.attn_norm.weight": "55f9d6dc9d75973dc75136ecb9d991b4398097ac133070873fb96ec76a6f60bc",
-  "blk.26.attn_output.weight": "ebc4fcbd15b33263e50ed2ad45740867cce15bc90e1216623babcb1820734509",
-  "blk.26.attn_q.weight": "080f057521073e412936fe3fee64fd574c8128fa4a148b879d3e598fe4954581",
-  "blk.26.attn_v.weight": "0fa2830d6746487ac91b243716e4302361f891e4e008eddd14abec47c7809d5e",
-  "blk.26.ffn_down.weight": "cb2ab8af1653adc57111ada49d2825c6995e338c8208455b92de10e580f60f31",
-  "blk.26.ffn_gate.weight": "231ce30966086bce2dc0e0afd34a22a1958cfda7a57c41b3b8e9444c5dfde8a6",
-  "blk.26.ffn_norm.weight": "35d959d25d17b00617590f5d5831bf705c385c51e46297a14375a700effca6af",
-  "blk.26.ffn_up.weight": "367680c8d332538b467d1ef87cfeb36cc5c6af564c5023c5fb50e728e3438287",
-  "blk.27.attn_k.weight": "0bfcb351c6d17aeac5b55a915074fbdf00f11c4bda98babb196ac8804805746b",
-  "blk.27.attn_norm.weight": "5d598a88c2e75ba59dd7ba4fee940bdec92d72038f1286536d2dfb71d008a09c",
-  "blk.27.attn_output.weight": "23a9da7347336479f6a10ded14cb3f46e06b5bd56dc4b0fbc526c688552ec840",
-  "blk.27.attn_q.weight": "b83319dba9055f069208e9c9d66da08bc6874f23e575288fcd81697d1777aa54",
-  "blk.27.attn_v.weight": "36ed34ccb2f36fdf16b2c2dd225a98ea6b7b0e376e7791191136ccd7bd7a4add",
-  "blk.27.ffn_down.weight": "5488e1d3a58c71b5e9ddda430540b4776b268cfe1457cbc1c2622dedd9e4526e",
-  "blk.27.ffn_gate.weight": "4ff48011ee0bac39af704849d9132a2410392c87a509c684f2062f6b76b498fb",
-  "blk.27.ffn_norm.weight": "32afe99675983da3de2961d1b5ca41c98970a356823597fe29e91f6e86abf0e8",
-  "blk.27.ffn_up.weight": "1eae3088a75629571fdbf6a20f141bc2bb2ed3f5ba2b9fd1d949f80695e442a1",
-  "blk.28.attn_k.weight": "c4e80af714962d6f9040d2c09f316f4a1cbc3a2e994e19902d7c653cf3c73dba",
-  "blk.28.attn_norm.weight": "c1ecf85dedc1c83d5d402bb7c94fb8b9c11f1a3e5f64e7680f80912d4a560794",
-  "blk.28.attn_output.weight": "72ba47c061b21f5ebc5213a455eaf6fc49c8f8e04ff9ce37e6ed4921b629161d",
-  "blk.28.attn_q.weight": "c4abc47234307f44b8ca789aa6668e298158fa4b459b2c1e84bd581806591cc1",
-  "blk.28.attn_v.weight": "aeba950799d4950e491ad0fcbe30334e39b8975177990a2cb339031c45ac153c",
-  "blk.28.ffn_down.weight": "4e84ce382a37b994fb8608df451a60040559e3f4f3241c3b3cb8989a3ed50d83",
-  "blk.28.ffn_gate.weight": "04df157acdc8e8534ad60acc2d2a4dd3a7a6610f6382535ec728994fa6f83f83",
-  "blk.28.ffn_norm.weight": "4d0386dae2bd1c1a9d0f9730718333e3a486c3bc6a5c5d482193c75d39832c80",
-  "blk.28.ffn_up.weight": "fec60bb0a3daf182a14bd8311fe6dd1e3fd020c5fc273e2549cdb1a2d6b79b05",
-  "blk.29.attn_k.weight": "b0532a263aa5a4e2a7a80adc83fc5dec974493bd18da7f953e7ebfc3f3a19aae",
-  "blk.29.attn_norm.weight": "593fc3b4000c35b7a59dace09ca1756c08be0105b2edd354a0e1c16c82898859",
-  "blk.29.attn_output.weight": "315b896f9f0cbacd0ca8937384c3a3a227efa908cb8c3a9125ec00c480e32b9b",
-  "blk.29.attn_q.weight": "d482d45386d4ad3394f08e9dff233ee3a70d0427d65c0b8fa05905da7e25ca53",
-  "blk.29.attn_v.weight": "cd3b5a6e2852da796902930a6a84bc87fc6a7c7bf51f8fc23758d12a39013b36",
-  "blk.29.ffn_down.weight": "5b3dba6f9753bd1b1ebcba65ef5373dd62c38e755c44b7231b95d93d45761f89",
-  "blk.29.ffn_gate.weight": "8610d9d2db15c256243ffcca3ffd31786d0ada0af0e7c7aa3fd20524370ab036",
-  "blk.29.ffn_norm.weight": "1a2ef2d38b7ac3e51190b9ccb8b6552ba83ab290e523356a7f851ddb35dedca2",
-  "blk.29.ffn_up.weight": "a5fdd15811bde16dc27677cf1a4c97daab4c28cb12a9530f1a0e573134fdb69c",
-  "blk.30.attn_k.weight": "1efeb0b5f4b45a85cdf47300f892ac77ac1f38000ec3653565d1303d1fb8c743",
-  "blk.30.attn_norm.weight": "c73934c182c7fe80838ec1d0b92f50a583f75f7a3d78d822f009b58ad2c80e65",
-  "blk.30.attn_output.weight": "3a0fd89de2d274614750345d827a9c886a4f97b343a13cdf680390505df596a3",
-  "blk.30.attn_q.weight": "711e113362bdb067db843c66236704eb1cd3fc5f40e3767143e96d510686ef4e",
-  "blk.30.attn_v.weight": "82b12a9a74fd3d91b73cc2e841e2b3f0a5197ccd2998afa17020995f880d2267",
-  "blk.30.ffn_down.weight": "af9f4b1287c0d824ae22d6e335d19e04a70135b835be7caa2435f1d85e931993",
-  "blk.30.ffn_gate.weight": "e2ab3e6f15f5c50fca66c084cb6a57a2b6b82406d65150e82ea0437b93dd9a46",
-  "blk.30.ffn_norm.weight": "c1b9c325c83f00e177386a4d7e769945f2995e60950c4a576c0a2c4ab9703d04",
-  "blk.30.ffn_up.weight": "9b94a21efd419715d82071b490d3b635cf1e8da080620dcc39e5bde976d7e9a6",
-  "blk.31.attn_k.weight": "0db0d82e3ddcc2c06209f5f013e1d72a84a996c40bf00186be485b909cc268e8",
-  "blk.31.attn_norm.weight": "2b8b7239471f57140c5cdfe06bd224a4f6326282f99736e44fba4c7b120ac101",
-  "blk.31.attn_output.weight": "a310b048840cc3ff2be4b84796340e8e2cdf05ec89d14bd3655c109b2bfa9fcd",
-  "blk.31.attn_q.weight": "f45e0cd95645175ea82813455356d171838539bc3f7676d877c698f2af0a0eda",
-  "blk.31.attn_v.weight": "8bde008e809112aa7e7c23e9c3099087bcc557313b01306c87efa0a4a30805ba",
-  "blk.31.ffn_down.weight": "8266fec7e203fbfad7033120861e44984581ff8b6851d01dfb7b81c5d8fa90ec",
-  "blk.31.ffn_gate.weight": "b73bc0aa5baf006d9ef6403104891b8133671b0992398fe038380b67e0d7e2cf",
-  "blk.31.ffn_norm.weight": "9c62cc27a7b6017c1df8ad49bff249a8245e8895c6754f402cd44623fda83268",
-  "blk.31.ffn_up.weight": "5b970a4694ea3171a0167f6e1636d9f00268bc1c9640430ffc35218494884adb",
-  "output.weight": "74fa0ef08c57a30e633e7117b1e9c805f833e2e5e21434bc79ddf9c92c6d7330",
-  "output_norm.weight": "59b8a59fd3fbf39353506116e43e5e76edd0cbf2a2873d869da4cf27a04997c3"
-}
--- a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
+++ b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
@ -1,348 +0,0 @@
-{
-  "general.architecture": "llama",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "llama.block_count": "32",
-  "llama.context_length": "32768",
-  "llama.embedding_length": "4096",
-  "llama.feed_forward_length": "14336",
-  "llama.rope.dimension_count": "128",
-  "llama.rope.freq_base": "1e+06",
-  "llama.attention.head_count": "32",
-  "llama.attention.head_count_kv": "8",
-  "llama.attention.layer_norm_rms_epsilon": "1e-05",
-  "llama.expert_count": "8",
-  "llama.expert_used_count": "2",
-  "tokenizer.ggml.model": "llama",
-  "tokenizer.ggml.add_bos_token": "true",
-  "tokenizer.ggml.add_eos_token": "false",
-  "tokenizer.ggml.bos_token_id": "1",
-  "tokenizer.ggml.eos_token_id": "2",
-  "tokenizer.ggml.unknown_token_id": "0",
-  "tokenizer.ggml.scores": "e3d3eea80bb41a1213f2d0aa3e8a38581d1f19323be77dbd779c9c7e3b72e676",
-  "tokenizer.ggml.token_type": "6040635e6bd38d98af06698feb75c1802bad35180ee6ae0a503e38c0f60fd71e",
-  "tokenizer.ggml.tokens": "604ac4bfbd019e430d7b6cdf18c6c0cd5b967900601f0307f714ec7773aa5ca6",
-  "token_embd.weight": "1d1d1d39a867d5a4bfb32792a47247d2638c10c95a6259391d02843583505cc4",
-  "blk.0.ffn_gate_exps.weight": "2e5cd43ac3f26c44f071926ff6c3f239ecc52a34bc9a5b5906d3d4c1bf2fbbfa",
-  "blk.0.ffn_down_exps.weight": "a4dfc7e7c96e7402eb70279601675b956bb7331da8101e63fe5c0a611b6972e5",
-  "blk.0.ffn_up_exps.weight": "2d5d87b378b2319c344ed2c642598b6f7cb6beeb582a8ea51abc9ae690d473c3",
-  "blk.0.ffn_gate_inp.weight": "a46aaf5aba7401ce6e41f158242b4879d34901661f3ede85496cbd0ce79d6314",
-  "blk.0.attn_norm.weight": "3fe37d913bdd2b65076bcdd6efe64a37b0b03cacbb1b80b9f7089068aa35f38c",
-  "blk.0.ffn_norm.weight": "5e14308a3c894734eb204c8f558bdc817e94bbd5b4e9cb4094e91ba388c8f7f2",
-  "blk.0.attn_k.weight": "73d943dcac0911e87bd771f4aa1c901e1bfe1aed293af06e1a67812159859f67",
-  "blk.0.attn_output.weight": "4c5f754c855e262e8d4c94c6fbbb57af06399dc0e170d7d99a1a17fc9aab9227",
-  "blk.0.attn_q.weight": "d6fd7403c873d49c05f6f03208f30d99ad34cb3b71c9990c47334d502a8e4c7b",
-  "blk.0.attn_v.weight": "cf17cf64b2d683bd9de6cebaf60e5c264df6fdc38fe719dde9d54c80334f6366",
-  "blk.1.ffn_gate_inp.weight": "0d524de81cd915816b4e714bf595ad6946a9130b3de731cd89428b2781230809",
-  "blk.1.attn_k.weight": "2ea47f412992b374c70674730fe84700e0c8cce177086ce9b6635e42408964bd",
-  "blk.1.attn_output.weight": "b4b2520794d54113e86c8ff678eacfc62e35be4395a594a6c8c22b4383ebcc0c",
-  "blk.1.attn_q.weight": "5db930c98c4f91f6eab57eb974c72210b158e366d23d6d2890b2759c053bee33",
-  "blk.1.attn_v.weight": "079bdde09668394bf7af9f8bc175017b4f48f0ab64e6dd855a4d7561d1693c0f",
-  "blk.1.ffn_gate_exps.weight": "146a62de19f9ab093deb101f9640534ffc3dc40d69f508be12fc0475d01b0c7a",
-  "blk.1.ffn_down_exps.weight": "949da94a3c0f375160672a979e85f7def284264b10d48d038238aad5f5ece793",
-  "blk.1.ffn_up_exps.weight": "7016a3f467d9e3f2f4b4019579ed86b757469cd367f2b225483305376b4bb3c1",
-  "blk.1.attn_norm.weight": "1614d1e6ed537737275eb888666c7bac533f4eefbe73dec92b591045ca9e1afd",
-  "blk.1.ffn_norm.weight": "405a455fa7d1ec36894652ceb554bbcb09a07fd6405f42741e66dc4a4665c19c",
-  "blk.2.ffn_gate_exps.weight": "90d5003fc7421f44220c0842d43128955e91488f6f785fe570b62d81b719e964",
-  "blk.2.ffn_down_exps.weight": "ecdc2b5a8b504ef0a7833acff47d69b0c1fa9c22126de1bb120ff5e48c3d6e2c",
-  "blk.2.ffn_up_exps.weight": "2cbd9485a32460d315eb50a2f3b00863fd77245bfe885b7565efac1cdb1f191e",
-  "blk.2.ffn_gate_inp.weight": "0d0a17a1a2c7a61f2cca49ecbb479154dc93a870873257bc4f225e7607f2e2c2",
-  "blk.2.attn_norm.weight": "b2e4c5a977f87a6f880896bd73596234c9b83622fa0d7add5892501e3155913c",
-  "blk.2.ffn_norm.weight": "0ab875b4280afa922376cfc7b9aa3f7071c9432ea1254091ce7de3749df0e8e6",
-  "blk.2.attn_k.weight": "bb884af51fb51550acfef54ccf1b58ce8284e587806e6a2f88c8265e1ad05a5e",
-  "blk.2.attn_output.weight": "0f03099ba1ef342ea61af9cd71d028123bbd8b1dd7d7fd9b509aef77815427d9",
-  "blk.2.attn_q.weight": "8fad0d29eb4c9d24e564774ee3316b9eb7a4c4985e4567111d2c836c830f6cf3",
-  "blk.2.attn_v.weight": "fe04c847ff677632401a94e7b6b6fdca60391ab21cb23bd791533115de6303a1",
-  "blk.3.ffn_gate_inp.weight": "29e3aaa724590c070e614af8288939603d2641b0ef11e8c0f476bebb2776673c",
-  "blk.3.attn_k.weight": "231cc5631def10f7f292d8862d6125ff555164cd70480ac76362149fad204497",
-  "blk.3.attn_output.weight": "86467a605c62852e05fda1a7ef43150df2cf715fe59785dbcba09f1c27cfa086",
-  "blk.3.attn_q.weight": "901822402453922225c2d6ac79616691d48217635d5ff7338daa971d5ddee210",
-  "blk.3.attn_v.weight": "27030784f44375720df2f090933645a31a022d3fb3b14573e5ca0b78f44070c1",
-  "blk.3.ffn_gate_exps.weight": "231ba59cc0b988d125d77bf627aa3f04636684870af88f081f3944b48a160d86",
-  "blk.3.ffn_down_exps.weight": "530c3ab44ae4d66e8afa4d10c153ba5dfcdfb7321989a988e62e9d12e7234625",
-  "blk.3.ffn_up_exps.weight": "b85c2d4d9d11332e702b3c0a6610d4f525f9a93e5d12f5c7c55c592c40755e75",
-  "blk.3.attn_norm.weight": "05dbb6d88cfa6b199f9d705ccbda97c0ef13f9ec875c595398a1a42d009a4555",
-  "blk.3.ffn_norm.weight": "6880b1c27d46969ce36fac049c05dc8b89e4bb47dc89df357e32df7e18fc512e",
-  "blk.4.ffn_gate_exps.weight": "a883b4f225b760c5a2f6605dc5e2167ab85bb398c70bf64ceb539fcbd6128dcd",
-  "blk.4.ffn_down_exps.weight": "d291bb656aae77947d4b525e2819bf4112afece53ff31de9dab999af1f65f9c4",
-  "blk.4.ffn_up_exps.weight": "38592afb8ba3dcfb26970f906174f7d3fa62da44fa4be4fc6912a19030ea9164",
-  "blk.4.ffn_gate_inp.weight": "1596cb74e8fd6c3080b937b06468bb397b0dbb661e6d180a6bcbdc43e8bfd0c6",
-  "blk.4.attn_norm.weight": "f90c83c5ff4366281d283384efc941620542b9cfdea160d678dc54a75e33f758",
-  "blk.4.ffn_norm.weight": "d28d8c49d1746b7cc085562d1074905fd14023844de823dc4fb22202bb280790",
-  "blk.4.attn_k.weight": "792bbf412cc357140fdaba543e547a9b2f7582919e307bbd9a80c7d6d8f5f1f9",
-  "blk.4.attn_output.weight": "d98e4a062d2631d9c315f1990d5f6ca9a88e7e0e46387f611ccb0353f876aa12",
-  "blk.4.attn_q.weight": "1a11a55a91d9f748a72176ff6b1c174844df406e00d1b66b9aa64dc6ee4bcd1d",
-  "blk.4.attn_v.weight": "04cb3c02b12a6313c7ac7044513441083d534fb4c5a3f63bbaa58f7edbd2fadb",
-  "blk.5.ffn_gate_inp.weight": "cbd5cdf015d33a2da6703eb74c22fcb97581fb9175435173b6dc4f9e8364320d",
-  "blk.5.attn_k.weight": "4fdf3405e4d657403f5647b51233521310ee984b4b81bbcd901cb3e6ab76b7ff",
-  "blk.5.attn_output.weight": "4a25662c46979a29600ed77e1907cf81fb16ef30e724c155444e54ccb76af481",
-  "blk.5.attn_q.weight": "e2acb30e30b97300039bb20ad0878f05159d5657fa811748a51d5b6fb35d631e",
-  "blk.5.attn_v.weight": "306504b6a26aa123c63dbbed3f4ced0ed2ee8fb6a30bf0093539b817539f5ece",
-  "blk.5.ffn_gate_exps.weight": "7e34df9b9944dbeea5e8565786d3aa6937314a4b87acd4d0874687877c5a39fd",
-  "blk.5.ffn_down_exps.weight": "c4b7a57a42b5ac0a8ae27dcd5cb2646d7a7cc7123126d44a56ab128e85f60b13",
-  "blk.5.ffn_up_exps.weight": "09d47593b6dd6c664a9155bff02fc2eb7ac4a70219a88162d05c802a01d3c6ba",
-  "blk.5.attn_norm.weight": "58804a036d6ac4c1fe357b8b6a97a5c37cae1c2f06ee0086c041d449c1c6ef6a",
-  "blk.5.ffn_norm.weight": "d872dee6789f0826211aa46ca9d0869e3e96bcace9e77d6559a7b6f3e524f3ca",
-  "blk.6.ffn_gate_inp.weight": "fb1eae732e974d6c1d020a5b4ef98c5f33016f984701bcea656f999a99daad66",
-  "blk.6.attn_k.weight": "55e9c59c5051ab5519b3a7962e1b5fa96a3c0251cb6200dc2f177885ad2de470",
-  "blk.6.attn_output.weight": "f3c834a8d0027370350e2b6294d95434d31432e57be6313b013c15a56303d61c",
-  "blk.6.attn_q.weight": "efaefe5f11c2140dc7cb532b0832c2a0b363a165cbda21f00fadae77efca377b",
-  "blk.6.attn_v.weight": "900bd734d75616d846a90a121c97e081c956a3d1ab012f66dd0bc62c43e1ec3c",
-  "blk.6.ffn_gate_exps.weight": "312a99661b1468fcaed2474621116f1681432755e973f3ee79d01912974fd424",
-  "blk.6.ffn_down_exps.weight": "ac9cd7db67a2ef0d2b5def86873673d05e48d49d147dd944469dbb8e2d4c46f6",
-  "blk.6.ffn_up_exps.weight": "57613e7e09579400a1a09fee4445acfbfe83f2f327fdf317877787d96ada6b84",
-  "blk.6.attn_norm.weight": "0e8801e09885c633bc01a9a5b85d4e878d30158a4eb41a937dc5b760ebd044cb",
-  "blk.6.ffn_norm.weight": "b8c58062ac93072f878446b0e7f958c737aa47fb769fc3a8f593133d12db2dd1",
-  "blk.7.ffn_gate_exps.weight": "1ef611732ff13edfa8d30981ed9dac00c15ceba9fc012ed0b199e9280a849948",
-  "blk.7.ffn_down_exps.weight": "856c6811945c7b0fa461ca17811cfa43436b4cdf5326bad23cbc30883486d7cc",
-  "blk.7.ffn_up_exps.weight": "6725e3e33994302ee13fa5ec163631ce2dcaa08aadde8fc166c2265d4561c5c5",
-  "blk.7.ffn_gate_inp.weight": "36b49d7f80c1003dc392b2c1b9960cd49889dd69e77b26b9e4b13d01f3d0a32a",
-  "blk.7.attn_norm.weight": "7a0ec49acc5e20ee71c6f80ca02f4f1e564c485e0ae0621309e7c2eb0c616cf0",
-  "blk.7.ffn_norm.weight": "eeae035c39ab6e64bc06a4baa1bf6e50d4c8b8797cb0ad8abd48be86974802c0",
-  "blk.7.attn_k.weight": "e8f78c1def01a7a38d2d9bf7becb17755e28fefe4927856f7890fbee52840187",
-  "blk.7.attn_output.weight": "5367f05ac3bb49ef8745ba5902e1bdd4442415a3ebff2c7e1a3918d7be6fe948",
-  "blk.7.attn_q.weight": "37c95fc5acc55a4f6e5f02cab9be60e4fe54c08b65f98f4455741b4aa542ff4e",
-  "blk.7.attn_v.weight": "c89f1343486ba55814233511e94090f7365662a8a4214aa4c278cdadc79196c2",
-  "blk.8.ffn_gate_inp.weight": "4e239afe8c7afb8de3a005757c887cf14b1622ca2d224227591cb0e5301f4c17",
-  "blk.8.attn_k.weight": "2ad0229f30fdcc1e85ce64e00d8f75902238294844a81d5af43e14ba75c02983",
-  "blk.8.attn_output.weight": "2e44a4722acb3b521b81d0b910f8ca2f6c286d874a92ddd02150566454061699",
-  "blk.8.attn_q.weight": "1cd2b09cb2f43e08de776b5f7eac197a5a6d4ffdfd52b21baa36319450147bd0",
-  "blk.8.attn_v.weight": "5a22c57ebfd33ac500cbcfd321d5b5b1783f8728801db6f3f8bed51c7183e4db",
-  "blk.8.ffn_gate_exps.weight": "91063fe56cb4f3ff3b41052bb5046fcf8ef61516a603ee90aab893a9d68c15a7",
-  "blk.8.ffn_down_exps.weight": "d4c3abc8f1d1b462f67f70bd8f404b3fcf45dceeaa8527fa120527254c383c90",
-  "blk.8.ffn_up_exps.weight": "76a1a1f08ec577716a2e7027b45293e9205751126424f1bebe1de89c78f087d5",
-  "blk.8.attn_norm.weight": "f980d774da39eb76c52358afac3e38cb4c81cb323deaabbe5c41822e3f17a98e",
-  "blk.8.ffn_norm.weight": "1c937658cf90f1a85db9a5f26e077730fdd4b694607dbeeb825c5fb2bc407e0b",
-  "blk.9.ffn_gate_exps.weight": "a2532471ecb7896d5c78e5a34e10cfaf4125265e1595166c8d0d0dfbe2a3187f",
-  "blk.9.ffn_down_exps.weight": "b47921a28412d48fee450b8b9d97cee42344a2e69f06d407fd9523d7adf13333",
-  "blk.9.ffn_up_exps.weight": "7c461bd1b2a73b439cff6a10d94afa01e8b06f7e6f09d9a6f28e3876aef48bce",
-  "blk.9.ffn_gate_inp.weight": "1648dfb08b5c06d7953a5a97ecb764995fae9487fb729a1c867023b2538149d0",
-  "blk.9.attn_norm.weight": "8635db0f299882a63b7cfcd1d4259c9e53fab22c31d3d054de36b1001380b31b",
-  "blk.9.ffn_norm.weight": "f9309aa323062d174c463613afef9b0a33501b510bfaa58a8e0e866d12ffef3c",
-  "blk.9.attn_k.weight": "dfe62030441e947a588512d18d9c6e4ed72c2f71c227d622c095e4263b23dadf",
-  "blk.9.attn_output.weight": "1977beb75c6349c50ba7dd3865d7c0a9c5c5ddc854413147b0eec98ac4fda351",
-  "blk.9.attn_q.weight": "eb132596719605cd6bd1782487f121994629e115190edd69240b12af66e734f5",
-  "blk.9.attn_v.weight": "9e708f15d332d7c5187b0693b1a977eb30a2fa10bf7df48ed9d7537c0aa6ed99",
-  "blk.10.ffn_gate_inp.weight": "97503a5d166c1925f9b65c0eed980753d411714d66896f3d0fad5286c7aba702",
-  "blk.10.attn_k.weight": "1ebdd222336bd25b48df1b138cdbe09021c4a5562ea7cb78cadd1255d2be3a39",
-  "blk.10.attn_output.weight": "5e98faa38e9d514b9057e1c8342c509cbe1083defd518e506f6bad89117d1f5a",
-  "blk.10.attn_q.weight": "3323a26c87d936d1dd87c577d0b763459fced726679612c874b3de5fc6d969c5",
-  "blk.10.attn_v.weight": "d5fa73cb56aca388e205f44455e4b4f676fdc12ed7fac4542fbb3b41ecea59ad",
-  "blk.10.ffn_gate_exps.weight": "225021b53782800906cd13b70be3a4161e8b300b97f984a959ccad6a6e8adcbd",
-  "blk.10.ffn_down_exps.weight": "f08eb91526bd22f5fd0402fe925d6141cdbb308a1ced0330858d0c85c71f5ef3",
-  "blk.10.ffn_up_exps.weight": "a9f688350c3b53eaada5103b5848bd9a3d7d6b327a70fa16c24bf28ece933eac",
-  "blk.10.attn_norm.weight": "5ba426c9dfc79805015ccd76cd1068b0ad3bb7a8453e14bb1d35486f122d8f95",
-  "blk.10.ffn_norm.weight": "98891d6acbc3986b2581b7a3af9f5946a392d9188972c6a8b15d4e745a4f2482",
-  "blk.11.ffn_gate_inp.weight": "b2365a60566e7dace892e1cb0e62eb73ce387352601723e847052b34874feaa6",
-  "blk.11.attn_k.weight": "0efbc1d1430505543ff71532a4fcda821aeac616ef6c1dca40e00d4f2ff70bea",
-  "blk.11.attn_output.weight": "3d5bd4d9a41236f30d4293edb9ae27beaa113ffb31b4fbfadff3a4c370dfd3e6",
-  "blk.11.attn_q.weight": "aa11e9db14dd9c77951511443077c2a1a78070753d7bd3d9811038473f69e325",
-  "blk.11.attn_v.weight": "5adc567f377aa11d1763d35f50e53fb2896a8b03b623ac36acc45efa2486d512",
-  "blk.11.ffn_gate_exps.weight": "71d07d982aabfab9eed3c733d49c20f023bf475368fc71db5084d91beadc4b47",
-  "blk.11.ffn_down_exps.weight": "9a06e61461e48b3925a9f7d9cca634d048c8b62163d7bc5c43e35899f959319e",
-  "blk.11.ffn_up_exps.weight": "bc05494d0dcec61021b3ac0c5bc1bf502736cadf48224e213bc139d562699a89",
-  "blk.11.attn_norm.weight": "a5758a10bdd0404ae1470e8e9db903985d4d07f60553c5001a5e7b660d4f7ada",
-  "blk.11.ffn_norm.weight": "814ae037563aad3771787316bec4806c95bf6f5991dd6474b4b1e5cc13dc18ee",
-  "blk.12.ffn_gate_exps.weight": "3a68b831ba1606fb9ef6dffed4732032447ecef23ea563ff4e79317586c7eb49",
-  "blk.12.ffn_down_exps.weight": "268b25e13f4b7beab08686e83705a41b21d15251809ee4784526f78a580da829",
-  "blk.12.ffn_up_exps.weight": "9105751a5b5b42ca2614d0456f24f779d2e2ac8cdff0f96842aa7ae2b70f341e",
-  "blk.12.ffn_gate_inp.weight": "d0de1558cc1d458c5c504f63ddc59785c323df7330474bb0644c346104b40a3a",
-  "blk.12.attn_norm.weight": "859a4c8113678e2e202d10299850e0cfb52eb11ea50bcbf4fe3ff39bdd394154",
-  "blk.12.ffn_norm.weight": "7fbf4c459c1760218877e9ee3f5ad49e960956a4369bcfe96c143f04ff9ddf97",
-  "blk.12.attn_k.weight": "0a7e254fdf3730a57372b6ff421a613eabaea68cdefd64800857941411318374",
-  "blk.12.attn_output.weight": "ceb763fc15d88af149d8fb78e82db2b7dab3aeae584af8cf7611a12356a397e5",
-  "blk.12.attn_q.weight": "a43402d23c46cb2d3cb3c2a98c81b19d10026b7e6742370fed6b2880b6e049b5",
-  "blk.12.attn_v.weight": "3bc24f2c0480ce91ef72993ee8f1cf962f7359e12183424583ffa1246bf3db52",
-  "blk.13.ffn_gate_inp.weight": "a6d68c82bfe66d8bab68f980f5f18268a9e2c0cd6b8832ed39010e0de198ae05",
-  "blk.13.attn_k.weight": "0166c39546b37dc2e01b2b396ba43e183f797dd04eaa51a6d103d8b58ee4bace",
-  "blk.13.attn_output.weight": "2ce5eb198deab9557475a58b69b11e9874b547e05c23f223c6e42fa35ddca069",
-  "blk.13.attn_q.weight": "745c1bbdf434284a7fae98f45e821c076dd9c2a2467dba6a9d8cf0041e419dbc",
-  "blk.13.attn_v.weight": "9ece68d5ac64d1421ea7aa32e1cff9cc1fecf5175f4c4da858dd31d8633e3337",
-  "blk.13.ffn_gate_exps.weight": "ccfdcb4670b131689de12d396a010b5ea737795cf5c15a14a304d720b3c7c899",
-  "blk.13.ffn_down_exps.weight": "8b8fb328664764f1aaa5cbdec336d5654e981e965a02ef622bde5f07ea1c164d",
-  "blk.13.ffn_up_exps.weight": "d2ace0236c2fb3365fdc85499d676a7f65813c48e5085348b1df1799922766ec",
-  "blk.13.attn_norm.weight": "1ed29d7d89ce52d7cb4d57e895ff7115430466e917136c049c385c030ed44e9c",
-  "blk.13.ffn_norm.weight": "a194fc542597a4dcfdfaec5e3cba2a2b2b21b21edfc87c39c0d7f7651355bc4d",
-  "blk.14.ffn_gate_exps.weight": "a625e3574e5e740e7f8e2f9c40390f2f382c720aab5b10534e298002dd8d1fb9",
-  "blk.14.ffn_down_exps.weight": "bc366f015b83c865946afd74c8a884943e0ea2c671314a0b7bb72f21a44d2f78",
-  "blk.14.ffn_up_exps.weight": "ee3199bf2086de77b49f57f487676be8ee70e102a2fb5a5ef8ddbbc28a9eff41",
-  "blk.14.ffn_gate_inp.weight": "2b437870c850fa2e2044d032bb02908af634356e37466fdae260b933e48ee8b4",
-  "blk.14.attn_norm.weight": "cd8344d193a1cbd42bd898e17f4bcb1ca0b2918420fbdafa9249a6f2b7f4ae06",
-  "blk.14.ffn_norm.weight": "70eec40374e558fed5b07257283cf36342b6b0129285a00007deb59c32c9f7c8",
-  "blk.14.attn_k.weight": "4053bdb507e0543d724b632570bac86b31707696d90a0db44c49b2a082e0d599",
-  "blk.14.attn_output.weight": "0182632cb0e06a07241b8293d25d109fbc1862e1e337d435f908e8681e2eb1ab",
-  "blk.14.attn_q.weight": "ffc7794a4c1b6f793c842dba969435330a7a80b9212e457b4b2ac33e68b41241",
-  "blk.14.attn_v.weight": "6411805292d528e61bbaad8f9aab9dd073529a17946c057fb06864fad9cf3211",
-  "blk.15.ffn_gate_inp.weight": "77d0744567c76e6abb67f81ba9c715b2b544841186d5b948309571eff213bafb",
-  "blk.15.attn_k.weight": "1f7957954ea4c6521c257b35a360e868ffa02bdb3de91f146d5e06bb4a545c98",
-  "blk.15.attn_output.weight": "d7809d36bd8d3342240c46fd87bcc7f9821a222f48d9a95e45ae50460265d3cf",
-  "blk.15.attn_q.weight": "25f509313ae4d8401b871904059f472a26f5714e7c791c725de77a1a522c976e",
-  "blk.15.attn_v.weight": "96fedf5a591fc0f020e6de10fd72ff12b3ef9cf70cd21dabaa0d3e7b06f54e73",
-  "blk.15.ffn_gate_exps.weight": "8f950d976b2fd9a3d213b84123cf114c1377efde9352767fb2ddee89e177c8ef",
-  "blk.15.ffn_down_exps.weight": "6fd09d1557bb94b06efbd4f6a1ca4be532a202ba290e9315bc8da3d12a5c4c4a",
-  "blk.15.ffn_up_exps.weight": "cbeb59ae7b0266a928dc7e3a6e70a9330b92f9ee1b17ee1ed91022108204a33c",
-  "blk.15.attn_norm.weight": "2005330911ac2edc7b6d27aca021c67d30d16eb632e49b1a13f30fdb2717aed0",
-  "blk.15.ffn_norm.weight": "0e9198f3b548eb78acc8961f2b3350d238d26cec110933ba753a8cf0035c501c",
-  "blk.16.ffn_gate_inp.weight": "a41d1f99d739c8b150c3945b6949763988d0c6a4c5a2b5855592ca1a48ed23d5",
-  "blk.16.attn_k.weight": "b624e2ec88c2d3047f60530fb87e72cb4a5e655a9663f6f3e9b09e5ad32cddaa",
-  "blk.16.attn_output.weight": "687759ea75e45108526ffc1573d6fdf084728079bfc2dc89b9979e76280f43c4",
-  "blk.16.attn_q.weight": "beff3a45c7e9ec82ffc6d3c701126be28654d10aabd747d03441210491fd31b6",
-  "blk.16.attn_v.weight": "43a349b13f0b9d040cacecd942bcb168c030fef8c75c987d59a4fce6c14e855b",
-  "blk.16.ffn_gate_exps.weight": "793406d6c13d727c82bb7b692ca98d65ca975baee69fc57be5378d77c5a19b62",
-  "blk.16.ffn_down_exps.weight": "9bad3dd150d0230404b7f886ac7ff8803225757e813f195cdb26bad245243b4d",
-  "blk.16.ffn_up_exps.weight": "7449d663023fea3496475bf0a9c1de7272ad0ce9adcb3265e8e424badaa674dc",
-  "blk.16.attn_norm.weight": "a424ce34c195a401df1ce37ac4f2794e8a6720b1ee8acb21428e2b68c65e0125",
-  "blk.16.ffn_norm.weight": "405a68bb8e16e1064df2de55ca3cd9ceddda1d9fc0af007a9bd7cad4b2676248",
-  "blk.17.ffn_gate_exps.weight": "97c6e5321491ca5dc039ee88da0eb0e78f347372785411809af84b3298cb19dd",
-  "blk.17.ffn_down_exps.weight": "1617ac19788a1be19bac69277408761e6bdf5719d63a8c7fea14d41cc27641b5",
-  "blk.17.ffn_up_exps.weight": "4ead1c365f112581c10610ea3f63d2a1474311d2503d2060fed4b458ef337f5d",
-  "blk.17.ffn_gate_inp.weight": "ed4b3393f2523f2b5e0fc7680a1caa2842e605728a529b5af68a7fa8d7abf940",
-  "blk.17.attn_norm.weight": "beac17ef86a7fb2b5840cc72f7a95a5e3d6bd24e7fa698e0b0ebb9bdac45c561",
-  "blk.17.ffn_norm.weight": "81cb58ec6d6dc02a0b4ede10adc336dc865fa76f982d4eab0e4a37b40f5b0fac",
-  "blk.17.attn_k.weight": "eab569e5ea8c8b05e5a6a209fba031129453c2e28181eee3e736b3b04b36bbec",
-  "blk.17.attn_output.weight": "f85b70f01438ce8fe5d10599b113f30bf18dee2bbae0657d3eba295870001db3",
-  "blk.17.attn_q.weight": "887ceebfbf6a2b94b43d2df4439ac3a5bbc29311d4b28addc04d525546032047",
-  "blk.17.attn_v.weight": "2df9414d65014c06a93da22ba3a668be7b83e2e8008e98d7771f7dfebed98298",
-  "blk.18.ffn_gate_inp.weight": "9b07741a0950fc667e5fd25937e33bc22e1f764f80eb4ff3119f005327ae0f6e",
-  "blk.18.attn_k.weight": "8649598dbb63938744c39bcda5ce8c31773e29c573be8d4d2c114f5030f8d3e8",
-  "blk.18.attn_output.weight": "f8e391adb92622298ca834d5d1eda48b69c3b1c51c5a584ef6c54a725c298d75",
-  "blk.18.attn_q.weight": "84bf8708a2eed618f48f69c178ed7dd11fa4c468102376e72e910ebd037d131f",
-  "blk.18.attn_v.weight": "31db3cd773f09548c2c1b1eac2718e46364a7810970fe9c433fad9d8de5397eb",
-  "blk.18.ffn_gate_exps.weight": "be2a2ba378002f1b61f86c273a69eede9b93786d5ce96b4fee1861f730dca4c4",
-  "blk.18.ffn_down_exps.weight": "d35196159e37705db50a5343e3989f7335477f1a4add67ef42ad64a638cd07ae",
-  "blk.18.ffn_up_exps.weight": "c6ceedd86e97913a6dcadc838e7abb762d629fb8dd55f15cf02fd9bd66d2ba78",
-  "blk.18.attn_norm.weight": "41f0b1ad83d6e3cb9fbe0d27878c2e7ad4a351b9f554a6bc9117c01745cdf6e5",
-  "blk.18.ffn_norm.weight": "96646204bd0d82f25dc77faba4dbd86b1332e449313e6684e00122da8be99057",
-  "blk.19.ffn_gate_exps.weight": "c6eb7f61e7938bda0492dbc05e51e8f631c99224fe18e99861fc4fc53ba9e9ff",
-  "blk.19.ffn_down_exps.weight": "4384803da3a3a3d44120d7dd192fe2c9bbd9a1a0cb492dbec1fdd7565230f1e8",
-  "blk.19.ffn_up_exps.weight": "22d73de2fbb8bb0f1bd2caf17fad8a355c47d914143f7f6e6d0128f66f074a60",
-  "blk.19.ffn_gate_inp.weight": "9a0cc4a2301a5634022fbce41189021bf0d1a961792d2d9330fd35556d18e5bd",
-  "blk.19.attn_norm.weight": "c5cc56ec5df9a1f7d5ad71fbda49f1433132e58895d45cb44c73420bd61ebd6b",
-  "blk.19.ffn_norm.weight": "77e17de741742ef2482fc7872fd423c8e3c1454dc4d2be89ee939084b6d78bc0",
-  "blk.19.attn_k.weight": "a92ea36ce2e3569656306aeefb835ccd5d1b03b33a86e0d3d030644cc923b813",
-  "blk.19.attn_output.weight": "5e2a912b37855f84ea964907a1a86d609cbdd79efa0c93c3e8e2fc07caf7c226",
-  "blk.19.attn_q.weight": "4ef3a5913292ac3c1a6fd3e9e53d011021f2b41d0276cf849706d1ca925cf7a7",
-  "blk.19.attn_v.weight": "42981b75b68ae852cee638b5433605c147da4392aaa6d7a06e756115b0171f39",
-  "blk.20.ffn_gate_inp.weight": "71381b9879a7c80b9f7b475abc0aa31b8cd71ccc00856ebe89764a2acb9df2dc",
-  "blk.20.attn_k.weight": "1928b7ebc054eb3967929ed6fb446314d5352f4aaf8b475ce55c6345019f2ea4",
-  "blk.20.attn_output.weight": "6071ecd9ca91af0d2ba93fef4a1a56f3b243dd70f862a21a2d164d56f386043b",
-  "blk.20.attn_q.weight": "002e95042a40f36ceed5829e3d0c8072e5f5e4ee86a089e2902b2348fed24dd5",
-  "blk.20.attn_v.weight": "42f509cdb1c0e298f89f896e349be86952c5168e49b3f83bb17badbcb7596d57",
-  "blk.20.ffn_gate_exps.weight": "a684a3ffe4b0a57c819a5fa9cb3521de223f392732927271e97ce925b6e33765",
-  "blk.20.ffn_down_exps.weight": "e3081a7bc7ba750d8a4886bc8ca4f231b55db4ca082b54b4106c7531964725cb",
-  "blk.20.ffn_up_exps.weight": "fad0fd5eca36ab154788da28be8ec25bb5d6db06c9d133db89e96df358a2f6a2",
-  "blk.20.attn_norm.weight": "c3e3f2429715ae95e884ef1246b0b461b23c5cc0ed08beecf70a14cddd184820",
-  "blk.20.ffn_norm.weight": "ff31f609dda65ca496b0584fabea6550e42edd05ebf229812aa6b7bb5ede15e6",
-  "blk.21.ffn_gate_exps.weight": "366f09ef0ecfb86808eb3296cc9abdb957951d27f6533c03f1422b54061da660",
-  "blk.21.ffn_down_exps.weight": "3fc495947d27fcca7fc0893c8a96e5d48ba27b2c8c58f8fcfb8dcfcd5539741c",
-  "blk.21.ffn_up_exps.weight": "6713ed51410bcc8283cbb001c4ad784098f25701e8021f4fa4f411e186859c4a",
-  "blk.21.ffn_gate_inp.weight": "6d4c92c01ec801647134d907bf1108878156df266a6107abc10526332b328b93",
-  "blk.21.attn_norm.weight": "27605719ae2df24f4f2e85a730927cab20367631612cb501631f6bbf38eb1209",
-  "blk.21.ffn_norm.weight": "ca80ee8177db185b15a4a378c1cb6f7143c76546a7f1726bda23f329323d4ffa",
-  "blk.21.attn_k.weight": "9e49f743d4a5bda9b4bd9c40c2ca37cdae5aec7e54cb193897ac8b4945ada14d",
-  "blk.21.attn_output.weight": "ab923540879753feaed152f5950f69cdd83d8f2413ca873f5f038b63ab0aea12",
-  "blk.21.attn_q.weight": "62617fc3f1c9d2aa672a4d91a121c7a91b92d145b65e75f0b06b4bb7c825dc36",
-  "blk.21.attn_v.weight": "15f8b2e72f8e8e992f2f6b3e93238a9d7be7bd6136f91c9d04b4b4cd0cd60369",
-  "blk.22.ffn_gate_inp.weight": "3ddb1773d9257b68add7a2a4e94dad25ed926803e02707863dd742ab9b2dc179",
-  "blk.22.attn_k.weight": "680e45a9e8d5feddee5266e119dc053bf80718fa9af1cf6803e6f493b265f1eb",
-  "blk.22.attn_output.weight": "0d5fae3402fb2c5aa3a860010e3973fc8e3168d1015f7a76b7b2964681693206",
-  "blk.22.attn_q.weight": "eee7e3d426ab533bd18d62c9aa142eedbde394bed07db58313e0fccc82a23237",
-  "blk.22.attn_v.weight": "26b5be1fe3c2b6824c5a648a3e4bdf17691904526fca158fbc3ebb627b67e2f4",
-  "blk.22.ffn_gate_exps.weight": "32ab7a7735313d60f6a75229b1aeee940b6aee176c9648536bf5921b0dc2929a",
-  "blk.22.ffn_down_exps.weight": "67590808f6a67777d3eb7976c31fe616d388b98fecbb12253b72d1241d70753f",
-  "blk.22.ffn_up_exps.weight": "fc245c0183e6d90829ff5e71a4ec93e4860b3d4c1a17b9dda2fb64f5f5c9ed32",
-  "blk.22.attn_norm.weight": "128e99d206d4d6724758ec97468af767fa0aea592149c324b731659c1e74a1a8",
-  "blk.22.ffn_norm.weight": "e45f498033f0cffa15da0eff2c47b4472e43fcf8921729fc4eeb2e3a6b3c78e2",
-  "blk.23.ffn_gate_inp.weight": "d63e686f5325fbc89fa242c2c52a3b8ff54f867dca914c9ae6eea13e9d6f46e5",
-  "blk.23.attn_k.weight": "f71f5a577f46ea12b1818f3a5ff4b85ddc45f9a2afb0fa2e041d71a3e31c6779",
-  "blk.23.attn_output.weight": "92b13563c1e0eac0d748fb67b235dfd7a64c8f16e2dafb316885744582e23b4b",
-  "blk.23.attn_q.weight": "2f9b9c35dc4f912f3f51c06e2d68f417b51a0de0a84aac530a64f9d3d7b0a2dd",
-  "blk.23.attn_v.weight": "268e40813806e74a5c364b19556d087bf8374e76e7b6fcf55c381eb7da13ccd1",
-  "blk.23.ffn_gate_exps.weight": "12f857e7a7ce228afac34d99b602c8d6fe96984f2a21118f459a58cb767ee65e",
-  "blk.23.ffn_down_exps.weight": "cdb082c16599c3bb36a28066dcc122d9529b54fa91b6cf0153437ec960a5e16d",
-  "blk.23.ffn_up_exps.weight": "f4b99f6f44d7b8b5a305894e88633bf5938fc1f6303a2b2092399da9c8b64d7c",
-  "blk.23.attn_norm.weight": "a691392210383915916b4d3886d5e4d56e7855e27e37e414fbd73bf66b3712e6",
-  "blk.23.ffn_norm.weight": "0c3dc72f667e5ae19b69bfa9f2bd2a01a57681f89ef9527bad4eb0d8c7b70da8",
-  "blk.24.ffn_gate_exps.weight": "86baca2a3157994df7fd8ced5e08436d5c1810dc29c0715637c36de723e0e7d1",
-  "blk.24.ffn_down_exps.weight": "ac5d559562b35c34993e34b071f66d15c65be5907797078c2d2a49aba54e3192",
-  "blk.24.ffn_up_exps.weight": "fce0a099cf09777f44fbab3606ceb75f7fae6f0b80725f9e871654b8cdf9262a",
-  "blk.24.ffn_gate_inp.weight": "e7c6800c0cfc56b565b2d35ad6f1dbfdb70dd0b05b338bc8da2286ffc3678d79",
-  "blk.24.attn_norm.weight": "dc6cc18ec52d102d015153c4a1132f9d7a504e29cbdec81c5edbf3b9e65815e1",
-  "blk.24.ffn_norm.weight": "480d5a1397af5e0e657f1e67d20ec0cdef5724e71246a326843321b87ffabd33",
-  "blk.24.attn_k.weight": "338c0597954a9b95a782545b2fe36469553e73f86ae2d2b5697767b28e1c7daa",
-  "blk.24.attn_output.weight": "a77d23b79933c67e52f1eef7f83a3dff4f767ce0bbcc39572f8cec4acd457643",
-  "blk.24.attn_q.weight": "45c9478593002be1998e96e70668aafa2dd3972380fbc1df12fb05c24ba959e0",
-  "blk.24.attn_v.weight": "515729420885408a6a9614bc27cda393ed907521318d14d21335d39a3eff0b61",
-  "blk.25.ffn_gate_inp.weight": "aae4ac40e9ab3925241f9d784b54b38851d9bc999a6c3bc03fc3f17c9b28a67c",
-  "blk.25.attn_k.weight": "4ab4808d02396c35b00b426f536015673b71c17ae6cd55bbc2e6bfe7a4c59d0c",
-  "blk.25.attn_output.weight": "1990bb982b77e0c947cd1a8ef0b36227ee1259e6dbbc2829e5c136edf88675eb",
-  "blk.25.attn_q.weight": "a1490f3048e8c0ec8784f8550c43adf5cc8d0f2f90131c934713fe4b1b015bd7",
-  "blk.25.attn_v.weight": "f15e53c6d45b3b6f58808fa968425d65e0b26b7f9b268127a77abb1227c67431",
-  "blk.25.ffn_gate_exps.weight": "656662447ff54f56ee80f78a1b9483f7efdc40f7375d0cd8a9c72ccf21f77e7b",
-  "blk.25.ffn_down_exps.weight": "db06f101bccbaef19cced0f6c185166e18202465f4a42cddfd535fbe5cbabb4a",
-  "blk.25.ffn_up_exps.weight": "584a7b02456f27fe1d8d3c7ccd21d426b6ea887795a3ed77f704596a1e3841d7",
-  "blk.25.attn_norm.weight": "8f0f3597982930fd237e9d609776c64f2b909a455b21678f83a7ebd4bbb83e64",
-  "blk.25.ffn_norm.weight": "3e7079c32582afba0c55e032f254adc18d2997705eec860185e9a6dd3d82f07e",
-  "blk.26.ffn_gate_exps.weight": "e70341691b583b86489812b29b77aa41eb658b1865733d6118da54c66e3bfcc6",
-  "blk.26.ffn_down_exps.weight": "5c1b812d11dfb064af816ced5ab6463bf9722eefdfc341b8a93705d5038fd781",
-  "blk.26.ffn_up_exps.weight": "e18118362ae54ef7432781c83884f9fb230a9d934e342aabeda8822ea5f71fb6",
-  "blk.26.ffn_gate_inp.weight": "cd1c5f6710166b9567c6b74c97b2348b191c60aa860958c6bc264ab095261dff",
-  "blk.26.attn_norm.weight": "71d087531af2520bda2e676c489e8529cef5db8aeea1eec0a937a8b4f2fa2e54",
-  "blk.26.ffn_norm.weight": "7f704e936fda28eb5c2cc339f0f6a5f78170b5aa43c01265b21668870d819c82",
-  "blk.26.attn_k.weight": "1cc62a0ce0ae251275d898c52c4a9fba5995fca10955d2011d10dd1a59e1afb8",
-  "blk.26.attn_output.weight": "636e881b1505f9cef656a4be98bec6a4765321d51f9bf1dac8933397cf44b765",
-  "blk.26.attn_q.weight": "89a3c4d202d7d6adebb9e0c1bcfd8b775f6456386f1be25e86e43acc949c1e16",
-  "blk.26.attn_v.weight": "ff2cc963b597cdf1a21703f3e7022af3bb4c65a34a19e19d9309a7c5e198b5bd",
-  "blk.27.ffn_gate_inp.weight": "6150139498fefe380bb99d11e72028da47a15ecb73dfc5b2774f726f4bed8f9e",
-  "blk.27.attn_k.weight": "f286eb9e5c56c7b801a497aedc40158c2a27877d7f9fb59b3fc67834798902d2",
-  "blk.27.attn_output.weight": "5dc3d3a05f9f7729509147fd09c16fb53f85f520cdab5cb69abf4bae3fd460c7",
-  "blk.27.attn_q.weight": "8462e40f86b24251960d6f35a9ea99b8793a01937faf1aec2859f2e5395dbb61",
-  "blk.27.attn_v.weight": "bac1a99e38e25953f8315f7212eb9777dc216cadb09b959977885ae62724ceca",
-  "blk.27.ffn_gate_exps.weight": "6a15eca7f0f6ecfd93db2e55c63875348ec4a78c4ff643ec46df9e958c0101e4",
-  "blk.27.ffn_down_exps.weight": "2e1c91247c4359e2073a8e5f26fd7f6426da7be3ed5bc65dcfff701f0a5022b2",
-  "blk.27.ffn_up_exps.weight": "65d6f5c553c9332085eae4aeadf25090b5d7768212ea7b08ed698102c21b29a1",
-  "blk.27.attn_norm.weight": "7fab8ae63ec8e91ce625cd130ab96d8427dad3a7413bb21b25ec5f408c5b9f5a",
-  "blk.27.ffn_norm.weight": "532720546b0fdcd423a02ca6e3e9d8aacb84b1b3e8269968f88a47fe2a69bab4",
-  "blk.28.ffn_gate_inp.weight": "a305ea58d98962d9dcf0c53ad2389b7acc8936fb35a0e3fc9410e7767cd49dea",
-  "blk.28.attn_k.weight": "8315e8a2e4f78dfdf36d4fc18fffc74bc95fe42c3ae4f9af2b6c874612c0f71b",
-  "blk.28.attn_output.weight": "9b5fdedd32d39ef46a22cca7cd5355d7b93bd07ea305f466a8aad6ca5a4f3778",
-  "blk.28.attn_q.weight": "4e8fb96997c30e231c437130f410d7c91d541a816f6c568b5f3bfdb4b8dece74",
-  "blk.28.attn_v.weight": "1fec739cf3bd7b4913f72ca358d4cf31391c304de44ac0ae31ecb825beaa7cfd",
-  "blk.28.ffn_gate_exps.weight": "9f259789d535e09268266b9a8020f32d6a6779966c909d91d3a10574f06238a2",
-  "blk.28.ffn_down_exps.weight": "516d3f8abaedb01b9916a4b67d4672159769138ef2850158bc1b32c41e31f0e8",
-  "blk.28.ffn_up_exps.weight": "f2f1d88d2c31ed588806fb5ad981d68f5134d7284c4fc022fd018de2eef437fc",
-  "blk.28.attn_norm.weight": "960fd005598deadaebd969996f4367a9dbfad90539a863674fe95730935acc64",
-  "blk.28.ffn_norm.weight": "e1993b37ced93d4049e9af2c47b0d9207d8f7e6f2cc3a52f57bef30bc806d805",
-  "blk.29.ffn_gate_exps.weight": "58927146338f443513337476b3cd30e6341742f096c2beb5890d400f10121298",
-  "blk.29.ffn_down_exps.weight": "03a3386e4f0b75a28c5608e23b2de8f0de25f21954e4aa7fc343431bde9db07e",
-  "blk.29.ffn_up_exps.weight": "6916b7490a7ae7b04a5d81cc1e7ac9b20c483434f3b186b12d87fe176bf1567b",
-  "blk.29.ffn_gate_inp.weight": "98e710e467a3d567abe4ce29d78b8e8dc033148762290c0c5e1ae4d78efd8c78",
-  "blk.29.attn_norm.weight": "4e64cb307d37be20d55f38c94faf7e451d11df5e60df347906cbaf9c5441be71",
-  "blk.29.ffn_norm.weight": "696c23a52f742679bd44440d687a4c44b4302d57f1e9dc5610d23374336187e7",
-  "blk.29.attn_k.weight": "e85253652fd6120c623634ba66b725bf7cd491318b54ccdad2c7df8851d64c0a",
-  "blk.29.attn_output.weight": "4f650a71efb150d1f24cd4d114d4187bf570ac424da3b92ea6455abdf1aea705",
-  "blk.29.attn_q.weight": "69fa7da901026ebcbbbc848455b425458b7e3295007d7fc093acf4b38e2166ea",
-  "blk.29.attn_v.weight": "17e2e7590b317b21f106de546aafd955579703d1e95d6aea044ee72ec3a514c9",
-  "blk.30.ffn_gate_inp.weight": "3a03284b4aa60d59d4a2ec86253469b61fc656372afca427cb77a5332fbcc62c",
-  "blk.30.attn_k.weight": "d518cfd0db9708e769eb1399e87ee49357dc54d5afdbac3d4c0ca46c64e789eb",
-  "blk.30.attn_output.weight": "9b44378714d784c5ef9ab604359091baca4e0ec222afa139b7f840eaefb371fd",
-  "blk.30.attn_q.weight": "cbb95365bbfbcad0c9cd99b4eebb5a5d32de68ce08e4063b5ec3e792b7548044",
-  "blk.30.attn_v.weight": "e7985c04fe1740e35a9598f43b67b0922b4fc2d00b68a92a9f917b82c3248de1",
-  "blk.30.ffn_gate_exps.weight": "8ac4bbd07935d98f895ba94dc174e5ad5046c3c222b53729d60f987c05e7eb70",
-  "blk.30.ffn_down_exps.weight": "dd672cc71e82abf05064a18121b8e55fe1a4f19bc1d7cb9a142f4add54bc336e",
-  "blk.30.ffn_up_exps.weight": "12282f664a2a12aa25e2deac58946108715ebb978bafed5274cef24569107646",
-  "blk.30.attn_norm.weight": "1a33458fee054c6c9c896a4bb0a4e1fbfa0293b2408c7dd2b81d692e966e7273",
-  "blk.30.ffn_norm.weight": "311e33b68051f507f1478ed8f2693fddb846170ddb7285a91be43f795c2ce31e",
-  "blk.31.ffn_gate_exps.weight": "8af43d9867a51cd8392fb48b981b0ceee0ae979c491c07d711b3b56b5162c786",
-  "blk.31.ffn_down_exps.weight": "5579cb7758c1600b19d1f540deffe081b575962e37437b3b2efb2fb0a2924e40",
-  "blk.31.ffn_up_exps.weight": "f2e7c005276b3a001fb40753f027fa10b4d5a346f43cf4b4bbdeec6e74e1cf6a",
-  "blk.31.ffn_gate_inp.weight": "89885dc0e30b6b16a90c0331d7fa3174671e941364e8102d934f02132237e61b",
-  "blk.31.attn_norm.weight": "99e4e9bf86a9edf8c404153a7e8a82324ba79da462622196e2faba161bd95172",
-  "blk.31.ffn_norm.weight": "55335997cf6de781bf332b943de96ff4646966b05d9fee86b76ea897e27b6ca7",
-  "blk.31.attn_k.weight": "cee570762b78da6316b637892cc4b080e40f57af5551ffb1866b9a8e80e96628",
-  "blk.31.attn_output.weight": "fa321ff55ec7819ead7b819fd45215262f39744569765ba2113c989c03588802",
-  "blk.31.attn_q.weight": "9e2c409b878f8a2a1436874abf428fceb1c534b21f9ad4dd6f532b8a469007f0",
-  "blk.31.attn_v.weight": "a845d0be68ba537b4a775bfba4d897faf7c82a811a2612b0b7420cc4f3574cb8",
-  "output.weight": "16101cbb74b54cda9ebc07ca3c762e3263a56efb3cc011156184b95807d7cf13",
-  "output_norm.weight": "d7aa61585baedd60157aafe157930785742c55989c288573566a971b02423564"
-}
--- a/convert/testdata/Phi-3-mini-128k-instruct.json
+++ b/convert/testdata/Phi-3-mini-128k-instruct.json
@ -1,225 +0,0 @@
-{
-  "general.architecture": "phi3",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "phi3.block_count": "32",
-  "phi3.context_length": "131072",
-  "phi3.embedding_length": "3072",
-  "phi3.feed_forward_length": "8192",
-  "phi3.rope.scaling.original_context_length": "4096",
-  "phi3.rope.dimension_count": "96",
-  "phi3.rope.freq_base": "10000",
-  "phi3.rope.scaling.attn_factor": "1.1902381",
-  "phi3.attention.head_count": "32",
-  "phi3.attention.head_count_kv": "32",
-  "phi3.attention.layer_norm_rms_epsilon": "1e-05",
-  "phi3.attention.sliding_window": "262144",
-  "tokenizer.ggml.model": "llama",
-  "tokenizer.ggml.pre": "default",
-  "tokenizer.ggml.add_bos_token": "false",
-  "tokenizer.ggml.add_eos_token": "false",
-  "tokenizer.ggml.bos_token_id": "1",
-  "tokenizer.ggml.eos_token_id": "32000",
-  "tokenizer.ggml.unknown_token_id": "0",
-  "tokenizer.ggml.padding_token_id": "32000",
-  "tokenizer.ggml.scores": "6e37bcde2adc7e350e87c496eddd7a2124329c1dc66c5bf3ad3997253e4f7a62",
-  "tokenizer.ggml.token_type": "b6ecf55ec64ee67d87750bdb8d757a2c58bf78377e9f4219f5689a6c4dea57ce",
-  "tokenizer.ggml.tokens": "d168da3ddd3eee820916945fcb9baf24dd3cde42f606cffa2d19e7c8a8743918",
-  "blk.0.attn_norm.weight": "216aeb2c9e0c271f899e1ef2a63cceeb8f41e97642e84fada54b1d3c1c11cf25",
-  "blk.0.attn_output.weight": "b597d56f7188ffc1fafc273fadc59d41738cffd677ae98c61a62c3285b3a3099",
-  "blk.0.attn_qkv.weight": "d28a6b44e13f59be5483e4be2bedb544e346168d720aca27f47d1a5a722be91e",
-  "blk.0.ffn_down.weight": "4a691370e5a61fcbbf540fbcbf4c0f1d15dec0364528c0e916d0744f6262b63b",
-  "blk.0.ffn_norm.weight": "0c00af2b4a3128bec64a0cbb1084b042fdbe13d9ad0d03bd577f9449dfead338",
-  "blk.0.ffn_up.weight": "b32b52f790c1c083bfb8a3126dc1111cfeeb28dc8c584a930a1e5334cb176bf4",
-  "blk.1.attn_norm.weight": "68748011503c6c029e8e69a84a8e5a89338f378769627b6dbf7f93d715c292e1",
-  "blk.1.attn_output.weight": "2267344add13b048ca59e4377c86dc512be8046a57156901fa32a20fa74e4ee0",
-  "blk.1.attn_qkv.weight": "9109d2e3d7a2eacfda5226587b8be124a3bf44b972da7ebb17aa15795897eacc",
-  "blk.1.ffn_down.weight": "d675df4df4dd039c0c339ad6445d39eddd2004db6bf35bed6314c7497245a633",
-  "blk.1.ffn_norm.weight": "3b5767ae977bc8baaa06b06efdbea193b6b3ba605ce76d77a76ce317e935500c",
-  "blk.1.ffn_up.weight": "80dfd6d9d234b00334c89b8e0a02f81899c2efd377321c34ba5ba51a5f61b5ff",
-  "blk.2.attn_norm.weight": "6a6743b057e5088f145bc179e92c9bfb41163e7295d7b81c62e23dd89d2b59c4",
-  "blk.2.attn_output.weight": "bc5491ea54e0db81462d7d9b7d25cbdda380c2db8de041bd1c4ab7b76a1d19c3",
-  "blk.2.attn_qkv.weight": "a61287a9852e2f5aca9c100b471d98398b2913a3497c743de3c70ec9ddd7087f",
-  "blk.2.ffn_down.weight": "4fddcc382c8dceeab027fe43d8d44e67edb5e8ce4b9a1b7f773c87770380ade1",
-  "blk.2.ffn_norm.weight": "07e05f82b3f63f711db3b684ca79aed25c0657917e66f88af47348a82065c227",
-  "blk.2.ffn_up.weight": "4835a682ef1826c12df01ae7663fc45f9c82bc8e64b665f13fb7da8e201ec0fb",
-  "blk.3.attn_norm.weight": "f22aba7c03999ba7136f39cda747a39715e498699dc1716cd97fc5dfc58d1b1c",
-  "blk.3.attn_output.weight": "53b579855366fd786c5126b2b30aac4d583ca7bda56833c4865f5cadb5c18c6d",
-  "blk.3.attn_qkv.weight": "bb56aba78158123140fcea59c69ac562ca208f6d3086819417cdad8c50f333ad",
-  "blk.3.ffn_down.weight": "97280897a7cd86db2830c004bccc5bc094f50e293baded0189159a2019145a6e",
-  "blk.3.ffn_norm.weight": "10a8c99f8b57a960e8e0a1133c4a26f9148403d1b9bff2eff114917de996f3b5",
-  "blk.3.ffn_up.weight": "7324046c915e75d621b2043597a245a428d8eea31869135e6257a861491d8dcc",
-  "blk.4.attn_norm.weight": "507d8e164de94646edbfe33def8e8fbf7c9a6ee3fbaedb5000f72d9f51ec5e36",
-  "blk.4.attn_output.weight": "bbb3429e6efa98c150e0fdbf48c16180cbf0d0cbc1b3c253c6c319d78f4593a2",
-  "blk.4.attn_qkv.weight": "b95ee5be0786d3901273d806c339fe6c20e6bfffd2a20672a9f56af80921e8ab",
-  "blk.4.ffn_down.weight": "806bbf91df92a5a22bd5aa1ffb7fc2869f7293ffc7704771c290ecc583b27975",
-  "blk.4.ffn_norm.weight": "cfc2930a81df7aee3a5e7f726a15c1182233e868bf0d9d37f6b6ae6d8c15c234",
-  "blk.4.ffn_up.weight": "c3390c69533de2c8424e8069323ccc5d0c4543111535da04cf2c7d26745576aa",
-  "blk.5.attn_norm.weight": "0d71c4fbcefabbd021569442853d2fe90668b19409ae2805a718a829ca60beab",
-  "blk.5.attn_output.weight": "10ebd93629112bf2df5c30dd0953a4a5e9020306768283181ed426934d47e14f",
-  "blk.5.attn_qkv.weight": "5cb05633369f12d4b00e0ff787736bd846856682115720ebc6cce05270c334f6",
-  "blk.5.ffn_down.weight": "e28bcc5094212eafc7476dbc5b7a520d25b79578cbf4229d698e2655956a80ad",
-  "blk.5.ffn_norm.weight": "b6f2c4cf9f34bb4d59989f96165c14a67dc1e266ad0a6d0fcc49f1add929e6ff",
-  "blk.5.ffn_up.weight": "0f9ef99423cc07ebedc0e9cfa95809f2d7108d910bb4ef97ebc0b0309c440750",
-  "blk.6.attn_norm.weight": "b3edcc47a42218234f7564d7470611b49401a41ae8cd42123f86557c69f5d7f2",
-  "blk.6.attn_output.weight": "eb9b7d257b388bb5b8fe0515e5c6873317239cb94cda236e4b6ada2a6c57c65c",
-  "blk.6.attn_qkv.weight": "eb968081f478c52f07bd9c2761741e982dba33cc4eeadeea3557d391b9ac2106",
-  "blk.6.ffn_down.weight": "1b8588bb7463206290322695577dcfced300895d6e6f4b26966c53a9ae2f0f84",
-  "blk.6.ffn_norm.weight": "1219c04b7770983c77814200eefe743f46d15328ea2b12711e44f8103eab08d3",
-  "blk.6.ffn_up.weight": "197ef287239fec47c55677f0fbb66eaf0644f775bc382de843971730721394f6",
-  "blk.7.attn_norm.weight": "b630ad08c80d564ed1c024384818e9fd3f22a36cd7a14aa96e7e2759a8285099",
-  "blk.7.attn_output.weight": "970255aa750828a47d6b9d399f9612b5bf25aefe7dadbcba41fc416d0d4067c1",
-  "blk.7.attn_qkv.weight": "ebb157c880293e6de8d629f263ba8853ed1dbdc02c311d43432bb8cfbb310739",
-  "blk.7.ffn_down.weight": "24bcd4db4cba844c89f878b81843c373dbbc0675e889d32c5b12e63384a7b670",
-  "blk.7.ffn_norm.weight": "b9c6f71001808ee873ce7db8056e4b53fb4cccec8b7f0f312899b575fae39d39",
-  "blk.7.ffn_up.weight": "979f1828d227455c26015a2a11afe9dd05f2bb97a8ba6b38c8dab3f50e627401",
-  "blk.8.attn_norm.weight": "4e8e347e3775010b7112ee630f2f4f2383be7ff64e6ca6154b9b22566552eaa6",
-  "blk.8.attn_output.weight": "65a44babf44a435a1829945211b3168f9ec78ac3cb7a049a733e93d11f0d6659",
-  "blk.8.attn_qkv.weight": "343ed07671da400b040812a4058482fa38284b5d9af9becfed07417fe26ce747",
-  "blk.8.ffn_down.weight": "7fb7e073e3c2c503c4e9d60efa0988fed7398d900cc003695fe3fffd3e188b82",
-  "blk.8.ffn_norm.weight": "b07c1f655d8593e3892a2cf73f8a0c19ce8e5cb613fafbe7cbd430da8ce4c57d",
-  "blk.8.ffn_up.weight": "8b26e14de54b3fdc2e2d3ea41720f9d9c236a93688c3b7fd7bf43f5fbb327c9b",
-  "blk.9.attn_norm.weight": "46394d408a8e316916177e6aa261de32e137a82d729c0b1800b072f0c38c39b6",
-  "blk.9.attn_output.weight": "d57f3d46107947a7073373a0b35d6ecf7759b5df15406f4a3590a60666af6b16",
-  "blk.9.attn_qkv.weight": "14bb8ace8c5453148f4b536e9f4279c813f31136716947256f5cca333448639c",
-  "blk.9.ffn_down.weight": "2b8d98e2b5ed68338f6e4de43bf7de0c4858cc69103cd5177725f7444eec7694",
-  "blk.9.ffn_norm.weight": "41a499dfd418cc4c6b8c12313f673f7e2cd4a3f9c4065eb6c4feb5eed02fb542",
-  "blk.9.ffn_up.weight": "143aab7533a64b17fbe201490a6f674bc7f0bd370c094500b2e100419073d1c2",
-  "blk.10.attn_norm.weight": "ebb670aafd36816a794347287269d8f1a5b19c1e3c0a1e38023bc19fdba9b073",
-  "blk.10.attn_output.weight": "b5d65bbc0ed5e49fdd9d754bc18163cd042a285024d0cf6f954c503bc8c877cb",
-  "blk.10.attn_qkv.weight": "f06b15bac88da798fa34a62b03eaac0dbe8b846020516603c387541f2d8dd672",
-  "blk.10.ffn_down.weight": "fb091fcd1b4de25d1bea94d1755e255cb02914a030d23e3a234e57b8d46bde6e",
-  "blk.10.ffn_norm.weight": "eb347bdf9c40414af87e13a8e72e40b31f004b50f7cb366f1a219ced60a61355",
-  "blk.10.ffn_up.weight": "ed2d52fc881a173f404fe8a1067862c9856d6c3e0d2e90a330a7aa394e3f84d1",
-  "blk.11.attn_norm.weight": "64e252603cf010a0e502ca39fdf8d0a196a79aec67c0d2bb9213fc0cb80c47d4",
-  "blk.11.attn_output.weight": "228e33e21c69f52efc74fdfc831bc9af271e44b2a29a3dced1d64e667ce36eb5",
-  "blk.11.attn_qkv.weight": "ab9ce6d4ef9e42ee0da3f20a7708a3bbc5e79e967b05fa86ba946a05e2eb63eb",
-  "blk.11.ffn_down.weight": "0ca133b7835c98dc77c25d64e4eb7873778bdb5e4d22d8b80f920f46865b43bd",
-  "blk.11.ffn_norm.weight": "02455741a0dfd161c79aa1ecc381901721f229fdcda5615622a629631fb61cfd",
-  "blk.11.ffn_up.weight": "9fecdcc099fbb8e23c6b1ea9294702a027f4a58d265543ec5e7be79b8f63b354",
-  "blk.12.attn_norm.weight": "783bb459911b1b3609a9b2bdfe272f1670add73b5471da738e07ac47e2e07dfd",
-  "blk.12.attn_output.weight": "1e1a914c9e48b857206ac5a1f7cead994bc1ea91d5d4fff8c834d73f2e38ef5d",
-  "blk.12.attn_qkv.weight": "5953e7185ccb87fb4dae8f9426ec86315d4c7794326e8ab59b3a95d4af2189f0",
-  "blk.12.ffn_down.weight": "a3eecf0f394f86e2cfb48a5940a5c50ca86d71883b2f79fcc642a935fabce0d4",
-  "blk.12.ffn_norm.weight": "0a4272e41373c23bd72f10d2d82930aa3a1480aac75832bfbf01cebf0b86b6a4",
-  "blk.12.ffn_up.weight": "06f42776de3a7ceac3025f26a7a8bd20e062233cce2bdaa2183470dc4b30b87d",
-  "blk.13.attn_norm.weight": "5915da60fb03e201fa649faba780e5fdf1c761c262b206e5415cf83181f65780",
-  "blk.13.attn_output.weight": "4dbf6eab074fa3835fd32bd631a8208e511037d5056d2fd3015735cca7674ef7",
-  "blk.13.attn_qkv.weight": "d3d8339a1c4782d9e73d77fdebe154d3c5b83ac40c9175b3e91a4977d08f876b",
-  "blk.13.ffn_down.weight": "de6772b46a55e1fd42b007637dfbf68b6598e5d5b61622da0935002e1e192d3a",
-  "blk.13.ffn_norm.weight": "5a640ea3b8c7be49c95a58a2327e10d8e8d9d142504bde5c8091613e5b961d7a",
-  "blk.13.ffn_up.weight": "f35e3545e4bd3531b2e843b5efd31dee0c13c807ee6386e65473ba67bbec30d0",
-  "blk.14.attn_norm.weight": "9b34986450b7c98b4927e81e61a816f9e84b1addc7c14926402100037aad6678",
-  "blk.14.attn_output.weight": "155d52efb23d366016d861a251d4d1f4a0c13699188c50d50dba016a0d8bfcd9",
-  "blk.14.attn_qkv.weight": "8e1415084e1f33c73a777f19e752489f4dd312cca047733e5ea643cd4a955e04",
-  "blk.14.ffn_down.weight": "a2a142226b94baa01ccb65bdea2b7418e49085c1d9c3c63e544e3112c58a25da",
-  "blk.14.ffn_norm.weight": "8aecfd9b0ae6affaea31a80c5c9a4a14b31deaa0db7bd8f6da2a64d23447921c",
-  "blk.14.ffn_up.weight": "0c1407237b8c1bd02f193346b5681926fe698a5055eac6a7450451b0f991707c",
-  "blk.15.attn_norm.weight": "e037bd19880bfa83d983200fb0c7866f8ad16c3ff5cc4b4f3a37ca7373870ff6",
-  "blk.15.attn_output.weight": "045fe4fc95cc129a1b92771b179c11b12845c4c088786c607f17bd98857e68e1",
-  "blk.15.attn_qkv.weight": "7621b7559705cab1d4dea1c69f76dbf9dc1c8837a203b656f484703b9c1b70ce",
-  "blk.15.ffn_down.weight": "7e5ac20e290bc60761e1cd972354fde225b7fa861048d44d9a0dd9b046d55f58",
-  "blk.15.ffn_norm.weight": "b6d830d88f1db1825687973c8c2b1a24c6fa84f07af8d0e3ef9c86009baca0b2",
-  "blk.15.ffn_up.weight": "dcda0957cd04fc45476774dba2bbf9aa89d6b05d5ca7b10ae6f73ad2c49b1cd3",
-  "blk.16.attn_norm.weight": "4ee9b70ba15cb2a08240f93990e90f5068c48fceb481f8e2186bec8b7214eb3f",
-  "blk.16.attn_output.weight": "315cfe5536658d2498192b2980eade15b2c9a4ff220e4011911457b1727fa103",
-  "blk.16.attn_qkv.weight": "3c8122e3ad637583b9dcde8ff3a323267d3014bb1f0f9771e5322260ca9ecc8d",
-  "blk.16.ffn_down.weight": "3b5fbebd5ee2b86cad96fb8a9b45a8770d08f82c1c8b74d7061e866f7020a18d",
-  "blk.16.ffn_norm.weight": "ffab69f20bda372de6e5878f0539163e2fc6ba113621ded95705fc3b1465c9f0",
-  "blk.16.ffn_up.weight": "0935ea3d258da42d6258406365f39f58ddaabfe97ea5977580db3635188f24a1",
-  "blk.17.attn_norm.weight": "f030441733f3d147b4a06a1eb4aeb8465c7c24d9c53bf4c48fe7e134d3629803",
-  "blk.17.attn_output.weight": "07a955ef09e8dc766ac0df647d0b2c69f23c4c69a7137654b4aad80303ed0eda",
-  "blk.17.attn_qkv.weight": "1c10688061e21e2fe12ad0cb54bf03895c1f83c3b0df743a42f548b52cbca1b2",
-  "blk.17.ffn_down.weight": "ebb9cc9836f41d88fdae2aa9a4355514e4edaec8d1577ffeb947a35204e77f52",
-  "blk.17.ffn_norm.weight": "50aff44f6528b13db5389f2ddcdb7676244947610bd7ffbff3f881c968c2a0d4",
-  "blk.17.ffn_up.weight": "d716537949582be33bde6b02e38f5a70081c9642a9fb05a61312126718b8d148",
-  "blk.18.attn_norm.weight": "0ea695c4e53d637902f46663a6ee42adc493c36794476acc7dbddaa05b13840d",
-  "blk.18.attn_output.weight": "5fd35b500221a612eb4f4bddf0e9b6b7db4d7733032a75f8802fb2d884647c2e",
-  "blk.18.attn_qkv.weight": "b0da37fd030fe69581f990bf23bfd35467a1bbe558af6de7c0924f6b72e92317",
-  "blk.18.ffn_down.weight": "b355c33f44b328f4bb977567de8f7544db4b005d7a8fbded658518ecf3c5a153",
-  "blk.18.ffn_norm.weight": "58b3fe9094079989a86e0387143259e1cc35952d24dc3df290c4ba6df44f5c51",
-  "blk.18.ffn_up.weight": "2ce530954c342c30ed2ead5353f931960bfae1d278868504c0efb973560fabbe",
-  "blk.19.attn_norm.weight": "533e9aed66feea8f0392aa81f9e293240e1f009a5334253915fb60c2749b615d",
-  "blk.19.attn_output.weight": "84f2d00f98a4113a779d3b5d1c3e7c914eb47784d3ab13b290367c124c2994aa",
-  "blk.19.attn_qkv.weight": "fbe6b9f53b07fa7537d3b3d452d20a9bc666f9fd41ec2091dd28bc2f70fc668f",
-  "blk.19.ffn_down.weight": "b30199e098c8bb3f890183d8b18471e80b62b604729b277ad62488dd71e1206b",
-  "blk.19.ffn_norm.weight": "c81373e41cd340b7badb19f9517c77c4250b4eb9a02dc758b8b49b652487d7ff",
-  "blk.19.ffn_up.weight": "5a5cb083ca7725720e3a890f7fa46354760e8007a8188849a092e305694a75e3",
-  "blk.20.attn_norm.weight": "4953091b4477e354357a8e743ba0a1900633e52f1599ee082a0c9b0b2b5cd978",
-  "blk.20.attn_output.weight": "62d54f7749cd6856097b2632066a322b0296df915fe66f382c5b5981be0d4f23",
-  "blk.20.attn_qkv.weight": "406de9e35b0729ebe902d7a47905cc7fb29a921431ed35dbef0c03e5690a1329",
-  "blk.20.ffn_down.weight": "62fb678b0d1261e19a4903a2b347d67afcc8acff01feb33a687a35a2d1e6f9a5",
-  "blk.20.ffn_norm.weight": "cd9d36b7e71e55c8925b97bb09c28219f182626bcff094878ae39c3db887a14b",
-  "blk.20.ffn_up.weight": "b9276771d79d3e932e73ccc520c3f8476342b9ef312ed2ee1e0da822e6e3ad18",
-  "blk.21.attn_norm.weight": "66d8c8a35e13ce9c2a0e75b670150e2c31484a55c2316df46075312196178ed3",
-  "blk.21.attn_output.weight": "12ab46c9382648f9b3350fdd92a6be6352743d62d6b520d7e2024e0c838588f5",
-  "blk.21.attn_qkv.weight": "a7909676ee1675ca23cd29a5fdd226df8dd9d68f94c6c9bbb51dd9fd38504008",
-  "blk.21.ffn_down.weight": "6fb317279c6542e82f97d5a12a60fac1bd0fa0405154f9fbe265e2fe39bd49cc",
-  "blk.21.ffn_norm.weight": "c0f703eb3ff161b5ba4490d87d8684b8a6c47a8f433e12f418333b9db439010a",
-  "blk.21.ffn_up.weight": "6dbdb80ef0c35e364bbce12d40d5e74c7963c7b55d58d9579567a07ffce7b863",
-  "blk.22.attn_norm.weight": "f94237433bf03d675cb2f655b81ca91a1ce2447bc6b00b13d6b0ccfe2d411eff",
-  "blk.22.attn_output.weight": "e821f95995ce497c01e63ca64f737713b1b65f11df1903e51d444aa516f33f71",
-  "blk.22.attn_qkv.weight": "1b0f717c73afb5eb4c82a1708c4e85c969e8a2a8770d9ddb78b1870a2d8a781e",
-  "blk.22.ffn_down.weight": "0f33f7a3cdc685484be99aa0c03642b0b20850a27d1fddbe054b13a9382f3ccb",
-  "blk.22.ffn_norm.weight": "9df285cf211ddd7df2b36a50489af574755c7d4d98b29a05cd04566ae613c8dc",
-  "blk.22.ffn_up.weight": "63ac300e1efb34041dd0136cf43ea622fac6f0caccce1cd9262f5e08d2cf179c",
-  "blk.23.attn_norm.weight": "5f72d9e88689b4027b28f5f8f26cd3abb03635ceea7ec98a4c91a9fc691f6707",
-  "blk.23.attn_output.weight": "6ecf04ff61125c5fc768f8656497152149373daf321ee9c957e8f7245a1184d1",
-  "blk.23.attn_qkv.weight": "a9d9978806724c2959f2cf386c233831f08e1e933dbf2b32665e788d9d512ea4",
-  "blk.23.ffn_down.weight": "72c7d17886a3da17fa0daa456aa5e877b2ef5b8b403182b870d9ca5ca9c70347",
-  "blk.23.ffn_norm.weight": "971e4b712e3025a13419b5b57d674b5e4ab7f18f74b57b9afc4671623da90c4b",
-  "blk.23.ffn_up.weight": "df2b5c7dbd5834545b815073af0c7355b065124e6d6f0fee78d8fa5b2076dc3e",
-  "blk.24.attn_norm.weight": "c41957c4a79ad3b16f6e11daec1c7f530b9f3f4b618e1e4367c3b67787ac4ab6",
-  "blk.24.attn_output.weight": "ef7d61f5fc88ac6f31bf60cb5f4d2d6b8df42d38825807112361a7224b0dee3b",
-  "blk.24.attn_qkv.weight": "3e6a58fe7d49c90bb6971efbad3371c32256881173ea5aee4b0c296cb206490f",
-  "blk.24.ffn_down.weight": "f43619144047de42fed81dfa495f1815d3cb771330e574043e2b67620819292c",
-  "blk.24.ffn_norm.weight": "5501d4a2a98c8ca6b42e77b53b221dbc08f530f6a067256d787534ec6fe028bd",
-  "blk.24.ffn_up.weight": "d64c8b0e509e2b1118f6000176f8956cacecdbb200c7e95ed93fb78b6e26c84a",
-  "blk.25.attn_norm.weight": "502fa3c302d371f61c5791f4615b73018ffb1daa09b6499b227116581244c5d4",
-  "blk.25.attn_output.weight": "ad8391d4e9c980856f2547aa945b2b6a407a6382158dc1ddd4f08d94ecc24be6",
-  "blk.25.attn_qkv.weight": "42e8983780d4a01a02c54ad23d4df21eea437f119a10af5a9c12a76a42d308c1",
-  "blk.25.ffn_down.weight": "302dd010d4e0ab4eeaee89090409ea0dddeeeed3236415eb8f97c942497eea91",
-  "blk.25.ffn_norm.weight": "fb34c1ee5bca96986c08834df0a0c047ba041c1123ac1f563e9d64312bf82d6a",
-  "blk.25.ffn_up.weight": "10739a8de156816d93c92b935386540bfa976bdbef204f0312960f6fc657582f",
-  "blk.26.attn_norm.weight": "7036c711609128c4e55968ff3681d3043338879a5737efd6c2ac9e1a2a61f1a0",
-  "blk.26.attn_output.weight": "db5db45dead5cb911fa01da59832f121b7c18b2d167bf53741c40819f24d346c",
-  "blk.26.attn_qkv.weight": "cae34c6b7f82ed14348d5ed30a79919c383737c1694a9cb9c0de609d3b0c1d0a",
-  "blk.26.ffn_down.weight": "491ec3a4da9b4f49f8ebc6be658ce397a9b801ae9fb35e82177e47808c65e5d0",
-  "blk.26.ffn_norm.weight": "fd7059d75d7f0e5288511ddeeb0f772eb3cae3ccfe4226b877015834edc3c386",
-  "blk.26.ffn_up.weight": "ea1ee1274c56458ce056d2205e5bb6e5422ce4cb0ad58006b8141749b97a0c39",
-  "blk.27.attn_norm.weight": "cc362c9a937609265052cd38544af17a1a7448cea086d4c801139e1fc865832d",
-  "blk.27.attn_output.weight": "ba757a81dabde9cb1b069d1bb616fe79649a1724f756567ec61caed1304fe6cf",
-  "blk.27.attn_qkv.weight": "1ab8d7d02d87756c12c2275636823aa5ede3d683178225c4cac4bd892c319bd4",
-  "blk.27.ffn_down.weight": "deb1c711c8a66acf4dcd2d088e1548f8e08f296f755e4067d6557fa55afde88c",
-  "blk.27.ffn_norm.weight": "fc6242d8cb8a4a37a8ddb7e41e7e60a63d4a89edf36acb35df052f10b9c91ece",
-  "blk.27.ffn_up.weight": "8df39b09c4801f343aca78f2918a1f6db78c8c55e591eda4c69eadb74c26e180",
-  "blk.28.attn_norm.weight": "75b539308f77e3cefdc6d98484d8b5cbf0538f0c2869a77b7373a145a18bc850",
-  "blk.28.attn_output.weight": "ae128940eb60a6d2e121762ef4b3e9dcf9eb3e105b249507fa7f12de0e19822c",
-  "blk.28.attn_qkv.weight": "bdda781c288e9326c240e33905f8e621b6a2ad902e620739d34f93fcd6f933de",
-  "blk.28.ffn_down.weight": "f1d6e6d1c286b1138bfd7e53fe477f399ae93bc2c04e35416f84218ed7247965",
-  "blk.28.ffn_norm.weight": "3f837ce82c8b9bde0d61d08b6f5fe5574886ea5328dbdc53f2929f18da8b4087",
-  "blk.28.ffn_up.weight": "2af027002e31d1b6cfedbdb30a2b9d7213f3aa691167c353913adfd48fda31e4",
-  "blk.29.attn_norm.weight": "61e8003b5329462ffe0fe172f2b160260de006aed858332d49d75504b6b6aa7a",
-  "blk.29.attn_output.weight": "ca44542a72a37476dc73dbdcc01f5b7497cb3ebc4ea230a55c9634ccd8e56ad4",
-  "blk.29.attn_qkv.weight": "abb3d9d6abe57872ae3daa51935d43264093ded5ce63b49d1e280ee5758be0e4",
-  "blk.29.ffn_down.weight": "6764b895fce881df097489c263446f0106de36217997660c15984b3ee22a5a06",
-  "blk.29.ffn_norm.weight": "89e03e9a33fc0e6e31ba9f0c2bd7c5734a118c5602bb90148793e08a80e8d0ae",
-  "blk.29.ffn_up.weight": "fa7ad57a84954f4121653152efed1a871d8adb20a1ea9086e3e849ce359d7d2e",
-  "blk.30.attn_norm.weight": "91a697aca1e42af54f806a20211031c3369e8d0bd58df1b0147fe24954e1f5a4",
-  "blk.30.attn_output.weight": "36063fcf766c89ac75be56f688cc63cefe5f2c733fbf4378ea9956ad386fa148",
-  "blk.30.attn_qkv.weight": "2cacd1161f1121a2c0b979930134f4666f73fb8d7237b3b0659ae091b15955a6",
-  "blk.30.ffn_down.weight": "9f3fcb6217100595850c05dc98f9ab2a263afdb6ab28df2fcb08aeff512057d7",
-  "blk.30.ffn_norm.weight": "6c600bc1fc7de39d4f8917b81fc7d1d5ed2a9b56492234c13a4bd6028c30d880",
-  "blk.30.ffn_up.weight": "73cabd1bb011956b2689ea3338bb76642ef3a57c197377d666d2ab5f56317668",
-  "blk.31.attn_norm.weight": "72d3e1cc771380645fa75a899858c95f39857a4f3f1ed60fe1578df383b8bc53",
-  "blk.31.attn_output.weight": "40089cdd29994dc19a1d89fa15902a89cfeca3540f12dc9bf4d00ef82506e456",
-  "blk.31.attn_qkv.weight": "1d0bb40e9258071ae14290a53c619a8e331dda07354d2a02ef45766c029ae5e4",
-  "blk.31.ffn_down.weight": "8defa0e06335b793fa8be03883f0a322d6c5b33f52c69c943c35c60d16e42c0a",
-  "blk.31.ffn_norm.weight": "33c55d9d0c496ccfb130361fe131649346e098abaaac39c0519507e5d846721d",
-  "blk.31.ffn_up.weight": "599f6503f61c692c1f82001973d35119f9688db5e6be9d9c298411491c93f09b",
-  "output.weight": "14b8dc662bfa3308ebb2e102c562d8e52c15670e538f20f3216a9c310ca9dd41",
-  "output_norm.weight": "7f2294ba94ce65681df6c7ddd8698799199b9d77dc83c10bdad5c3999f0fdb82",
-  "rope_factors_long.weight": "e34d378664e354652c38f47d10dafb0498ccc2fb042d39ff7fef768146fff22b",
-  "rope_factors_short.weight": "9379146a4988f373d362fe47b06c75e7fe7c54aa4dc9558758df79b7a87471fd",
-  "token_embd.weight": "19a03c1fb5ac0baee93b0a7d8b0f26e9a9b011e229b694afc50ebfc13d84f8bf"
-}
--- a/convert/testdata/all-MiniLM-L6-v2.json
+++ b/convert/testdata/all-MiniLM-L6-v2.json
@ -1,124 +0,0 @@
-{
-  "general.architecture": "bert",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "bert.attention.causal": "false",
-  "bert.attention.head_count": "12",
-  "bert.attention.layer_norm_epsilon": "1e-12",
-  "bert.block_count": "6",
-  "bert.context_length": "512",
-  "bert.embedding_length": "384",
-  "bert.feed_forward_length": "1536",
-  "bert.pooling_type": "1",
-  "tokenizer.ggml.model": "bert",
-  "tokenizer.ggml.padding_token_id": "0",
-  "tokenizer.ggml.unknown_token_id": "100",
-  "tokenizer.ggml.cls_token_id": "101",
-  "tokenizer.ggml.seperator_token_id": "102",
-  "tokenizer.ggml.mask_token_id": "103",
-  "tokenizer.ggml.token_type_count": "2",
-  "tokenizer.ggml.scores": "6db964fe67338aca57790481a390121ff3dd643eebe49f7dd308029ad99abb6f",
-  "tokenizer.ggml.token_type": "98d247c5404b6b18f05f133b92dd56edf6efefefac326794b00d7b351f6c5aa1",
-  "tokenizer.ggml.tokens": "9efe405e229a45ff9916f54c475d151d2200cd2ab0006f347abfb069cf096c86",
-  "token_embd.weight": "8c1ee80a9ea4f65aa385ba30112010068af3d209bebc6e149d3d4589c2cd0a5a",
-  "position_embd.weight": "6c516f0b1c4e2388ab90394dd80ad69e4e4509b890982fc3408108ae66210eb6",
-  "token_types.weight": "f879f8e422ed211948f28b560d3c5e17aae7993f063b51196a28cf5c0fb3da21",
-  "token_embd_norm.weight": "75076e095d717aab96f8b6beeee503c27940d9a76f2b891a0e3de72f8a6043e4",
-  "token_embd_norm.bias": "298735285ffe944e1bf03e5d35c7280326b85cf121bde9874f1af5dc51ab939d",
-  "blk.0.attn_q.weight": "ab0923ce4c1549175112dcdfcc860fe30137f991e03ea6857fb5993670adaf6c",
-  "blk.0.attn_q.bias": "a3ec29551dabf976e1d34256b8ab5ab7b758f3ed9742c3cafdbd984d5441df62",
-  "blk.0.attn_k.weight": "4c1038a6d035c3e9ffed7fa672b614627814752503755fbad0cfb76a41ad71ba",
-  "blk.0.attn_k.bias": "e0363930eb588d91816aa3d230bb03b6e2551c165117b80b8d60397413819ef9",
-  "blk.0.attn_v.weight": "425e2e53e3f00ce98d29c3e6a161eb55d3e6ae0d96fdb9f6242d1c4fd6eef4b3",
-  "blk.0.attn_v.bias": "6579173a1e65ee124fbd0bd53cbdca4225515b4f2c5f18fb1bfd000f5978f9bb",
-  "blk.0.attn_output.weight": "a6d70a08cd7164de5d12af65d86d657c3db35aaecde778b2b3fda9193c4c9802",
-  "blk.0.attn_output.bias": "2b8d12c4f9a9c5bfaa29c597839568f6e0525cb41eeaf64ddeb6bd84dfeb9701",
-  "blk.0.attn_output_norm.weight": "bbe6e502a473228b525aeed26cc31b7db123ad63bdc5a6eebac6ea70b8b51d62",
-  "blk.0.attn_output_norm.bias": "36eaacaf0007c5c62daea97aab0115390c0682914f78482e37eb76885f4b7a50",
-  "blk.0.ffn_up.weight": "24654561c76ce387d125759ba843f06b904ef721fcceaeff6ccc62180a48e874",
-  "blk.0.ffn_up.bias": "fd3f0126aa1d95768fa60eb6f4ab8a2763cfcb7e5405f35b92353031d86f4d34",
-  "blk.0.ffn_down.weight": "97a829763a6a5bf3329ceb4d39c424ba4787d61653a5b0bbd1f84782e4d4e0ca",
-  "blk.0.ffn_down.bias": "7aa980c30ae8b4ee7f69df28808dbf5c431f56ccc4a80340f644a0419f16c054",
-  "blk.0.layer_output_norm.weight": "ef30dad4c2a083ae1ff5039a2a6cda60ecc89bf1e486a6f8c0d15f50589603f8",
-  "blk.0.layer_output_norm.bias": "8b1b77e67568b1bce43fc476de1b177c53ff688d66beb66995e8eb3dc290da8a",
-  "blk.1.attn_q.weight": "284331622a1f6f9b87ccee4f652bd66a394ca493c4d93be4d1844e4f6159ad10",
-  "blk.1.attn_q.bias": "e24ebd4860330e08f6bfdd077a82db0bee33f4c8846cf1db26327a34754c7069",
-  "blk.1.attn_k.weight": "729dd0d555544b5bd0f7580b3c8b384256b974605f0e7487b95f295aa032997d",
-  "blk.1.attn_k.bias": "2aa51a828a858f35473f54477583fea54ce2ccc34ea60fbd1d228fbe9bca827f",
-  "blk.1.attn_v.weight": "6be304671cc311d5ca5c103f2b51467ee800c589bc5b8101e09ff5aed1f68c21",
-  "blk.1.attn_v.bias": "43bcbab78a8819e07f723bc9e5b737b71e87a7594f15234e882b63e327a64199",
-  "blk.1.attn_output.weight": "15ec8a1a12b26c9976445308a09f748ab0e4bef0f583d13ab08c3129f8738d73",
-  "blk.1.attn_output.bias": "dac2146f4baa6ed16f6c0dc7443831fb7ec79bedcceafd80d1a4b628a1bb072d",
-  "blk.1.attn_output_norm.weight": "d2151eb33bffac536787a4c9a5d2b31c7a80b17c4611877842a3cce2cd6e98d8",
-  "blk.1.attn_output_norm.bias": "31e1b779716dafb855d2cf5631ee168a0ccf372eb9c6ea6091f66fa97a9b9d2d",
-  "blk.1.ffn_up.weight": "a57547fc3fc3b77406f5cdcb0c87af9bc184701f175c39c1f35297826fce3cc7",
-  "blk.1.ffn_up.bias": "123be6d541d086202913c75d878c54d59a749f3af7b58f7ef9eb9e7c62a24c9a",
-  "blk.1.ffn_down.weight": "cfdb79788377e5cbded8790cd41b9e66c397ecab75474071fcd7cf32d30f9613",
-  "blk.1.ffn_down.bias": "bcb58315519a573097960891c9ae41cf4c685ab78c3e0e77471471758a7eae88",
-  "blk.1.layer_output_norm.weight": "819b554271452bfb1d84c2603b90377b2e41a0ac1e3aa8b417ccf9dce63375bd",
-  "blk.1.layer_output_norm.bias": "47a3433ac27f5ce8947fb38dd491f3706df4ef6adb0ddf74612bf0f54b19e164",
-  "blk.2.attn_q.weight": "1557a9ea852b1880551f7290e00aded4f35e6c4180fdcbed1b0039bf805f639e",
-  "blk.2.attn_q.bias": "c3bfe5f3066f655fd36b055530997b59ff33ef013563aaeb3cb8ff07dabd59a9",
-  "blk.2.attn_k.weight": "cfd08eb69c61ae2f9f14f9b7ff5c5394ca264b1a9f3d48156677f90dd1766289",
-  "blk.2.attn_k.bias": "9b839bc0e79974a0b3f5d1895972bc6f5c9a1bc16052e1af786e6a530758152d",
-  "blk.2.attn_v.weight": "02b26b1208480eaeeb00e7b4cf8b690006ca14759357fc44ed4a2a8924ead993",
-  "blk.2.attn_v.bias": "e7e6f0089fded1659a867ab736c220d9653ea7da6b1b94baf5c8d30a748b63ab",
-  "blk.2.attn_output.weight": "a1db121c7d33806b349cadd050300a57db49fdc91224fd07c9ac43bf4299dc79",
-  "blk.2.attn_output.bias": "7675128b6a92555cd955c820311e91e9417d31f48848f45d047b4100c62148b3",
-  "blk.2.attn_output_norm.weight": "5b4595e0fbcba67a700c4331adf746d2fba3546364a4db5607ae241947bb1a21",
-  "blk.2.attn_output_norm.bias": "7b8e16826ea30e5a2ba0b02e0095a901775981a296e98819625320e983060d08",
-  "blk.2.ffn_up.weight": "a0d815d946ac07a65095c4ae4df77b818845e6d97795c7d82f55e689d944db59",
-  "blk.2.ffn_up.bias": "ce37c0a4174d6bf773ded7bd016ede627ad3bdb8bc99b9992a18dc8e8898f252",
-  "blk.2.ffn_down.weight": "f6231d2a25426fbd45b9f1160aa484220eb227ceef0348c4a6a6de890606e5ef",
-  "blk.2.ffn_down.bias": "429e00556e8dc63a785238b309b9d83738500c1ef6d736fe6526ad88ea496d27",
-  "blk.2.layer_output_norm.weight": "651457a573adf3f7dd9ee5dfe1c8e89389e94443993aab77ec6a0b05aa621e35",
-  "blk.2.layer_output_norm.bias": "41fbbeda7fd89b0cef5f945ae44011c316982390401d6f75ba8c6d365e185247",
-  "blk.3.attn_q.weight": "95a43f32949d2cb8d22815bb27a44abfc6665ba96221af817dfe058cb6ca72c6",
-  "blk.3.attn_q.bias": "f4e34385e75d8108b6b3bd336106e2133a8c9be0cc343dfe5dc48c32a823c7cb",
-  "blk.3.attn_k.weight": "6b892da6a17d4d3265265a15f695864a31813ee8c8e710ae9bc9e1adbc6c9a18",
-  "blk.3.attn_k.bias": "40b8067b641a56014cee42548240aa8930820958b1933004892b5f04fbaef39e",
-  "blk.3.attn_v.weight": "9fcd5922319dd2a461082a5ce040c1dfe65d87d70ca6547dd0b46eeecc3eeb2b",
-  "blk.3.attn_v.bias": "b528c56212e66931fdbe267ac327a9c2f87cd03baff3ea719e30afe681da15f1",
-  "blk.3.attn_output.weight": "e3b178c1b03981e75510e0d277af23ea59cc404b5394e61bd32291825719b502",
-  "blk.3.attn_output.bias": "712c84d39a6a5a9c06a09da8fd9939ba0d5525524a4bba61ea4de09b48f45cae",
-  "blk.3.attn_output_norm.weight": "d1ffac88e675592ff72f8a617be32b4a381d443b2f8f2645dbe44a1e5745aac0",
-  "blk.3.attn_output_norm.bias": "ea31a1c73146234c50e0e43f485c458413714867b8e2703af66482f7db2d6c40",
-  "blk.3.ffn_up.weight": "4ef4f3b9a1ea6ab2ef2eb6e8b008e06a44790d099d97482a05a51e39a29afac0",
-  "blk.3.ffn_up.bias": "06a4296dda16f452675c51f108079fe7722552d6521c737d97734943818b9a2b",
-  "blk.3.ffn_down.weight": "f114b2bebe392c7d80433bb880c6730293aa4561b0b0370dcdaf7472daebd847",
-  "blk.3.ffn_down.bias": "2c8e67831d28a3bf613fc7912ae3259b63d72abcaf4d30efd8800758400158de",
-  "blk.3.layer_output_norm.weight": "a1dfeb7b5a51dd56447312ca41e2ad2f361a3ea12ddc355127f5f4219fb0a482",
-  "blk.3.layer_output_norm.bias": "1ed630021b25c6c6fc93fd32988b9907df966d4982a93081f639aac3044618ab",
-  "blk.4.attn_q.weight": "b5fae4c1f9a5f33a2a2e816ac0c01c25f422e4efdd59ef1ed93da2610e5370fc",
-  "blk.4.attn_q.bias": "c2e376524ea98ac3b10d9eee19ecb1b1e261fa5149efe0232844c923dfb428fb",
-  "blk.4.attn_k.weight": "a4632f5ebf9321d9d08f9112a4e5dda2efe5671df4a4e67fee24845f5b14af16",
-  "blk.4.attn_k.bias": "a9a02ffb8b8b4f6dfe487a7e0341f1d5318c9d2b793a688f34cb1b22fc66ef60",
-  "blk.4.attn_v.weight": "10ad8deb81d9fa093b1e5c0f24ea82aa7df43e6aca49e260fcbea56eab8cc86a",
-  "blk.4.attn_v.bias": "7326813e181e021130bd33ac136293fcffccce2d1d8cb59041e5b13a8cceacf6",
-  "blk.4.attn_output.weight": "c92573088c7437c2b3cda51490e152c27fb19e5468df591eabba5a49d5398d44",
-  "blk.4.attn_output.bias": "14e10b419e5859af1eb685af5c330aee67048cd704dcead9217840c6f5393222",
-  "blk.4.attn_output_norm.weight": "02b6831c0e0fb0edbc579a92812a1dd972cb15d14fcd382d4427c5a7b300ac44",
-  "blk.4.attn_output_norm.bias": "7eed5cd503bb6bb6ceb1bc8b07cc077903a4f14fb8b9d6cdf39644815ecf1374",
-  "blk.4.ffn_up.weight": "8d0c91d62e74d6431321116a37cf3339e630bd50ba164d3304fc4fe8dd831223",
-  "blk.4.ffn_up.bias": "d325f07f73c005a273c484c7be8e7abb4d6e8a5c4fd093f5869133b97629d017",
-  "blk.4.ffn_down.weight": "7ba7bd81143f40537b84f938e403e19f30e4928625eb371de052b9025beb4d21",
-  "blk.4.ffn_down.bias": "2853d9c2a75288214a4bf4907dc19d04d01926f4913d302b1aa7bdbfcce0f7a1",
-  "blk.4.layer_output_norm.weight": "a4ed1885fa77b90fed5300c355ef0aa0c876a8c747151d9d790939d464d57d4f",
-  "blk.4.layer_output_norm.bias": "62142a81e813a9e636333b2b805d6bc3b17c5e7cd4b15adce1ada6bc9a32563c",
-  "blk.5.attn_q.weight": "afc1dff080a72c3daad01384b1448d476aaf789871017c8ff8e144788887995d",
-  "blk.5.attn_q.bias": "748a820371c1d4f872c84545b36358d239c35bf6c99e2812c237d88c3292763b",
-  "blk.5.attn_k.weight": "59e30c1ed8acd2cbb01de5f62e7804015b9ecf98ba157d98cab016344639eda5",
-  "blk.5.attn_k.bias": "f839520078f9e589496e982e86d0126c7aa14196047339abffcf49a696229f77",
-  "blk.5.attn_v.weight": "3e21fb874e21b90308e1f46af034a3c32d3eba1628d62ae5f2246d6af5818923",
-  "blk.5.attn_v.bias": "5cd4852bf95c1444d10d756750f6bf49f842c0b39e9953c7f408bb67c325ac8c",
-  "blk.5.attn_output.weight": "636ce6a7752895f204b9d01ba0aedd9a294f908b42f372c22a16d9dd590d7471",
-  "blk.5.attn_output.bias": "82d924d4b0d2b94f2bbff91619216d6967a3541ce9b1531a6a60457a67b5d219",
-  "blk.5.attn_output_norm.weight": "5e7bd0a8d3396080f3360d7c4700bf094a06216431bd014c4479eef72ecf4271",
-  "blk.5.attn_output_norm.bias": "66c6de5edda5466d029c6753780be81ccd4218bf8bc00680000e0f06856ab712",
-  "blk.5.ffn_up.weight": "5bbf6e7ea380e216e33f8bee06d25f2265359d3876a300e92bc6e41d48e33430",
-  "blk.5.ffn_up.bias": "9d795388bb36fb33ad3a37fea3ccb4937838e02800a608fb47d363cd06b47370",
-  "blk.5.ffn_down.weight": "2fd628974e7f075479dd227b46fbd48ae8d3ca34d735b36f391ac06410730368",
-  "blk.5.ffn_down.bias": "cd213ba9eaa75fa541648097fbe9c96e58077e6c3ad6ad2fb1f21f8350f44291",
-  "blk.5.layer_output_norm.weight": "159a9df41d15b7022d136f86a2a2631c4635f9816e957472217077b522bcf52a",
-  "blk.5.layer_output_norm.bias": "24c1f27ffd1eb4e5be7e3a2909943e6f0980635d761fa1efdd0c19645da23766"
-}
--- a/convert/testdata/gemma-2-2b-it.json
+++ b/convert/testdata/gemma-2-2b-it.json
@ -1,312 +0,0 @@
-{
-  "general.architecture": "gemma2",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "gemma2.block_count": "26",
-  "gemma2.context_length": "8192",
-  "gemma2.embedding_length": "2304",
-  "gemma2.feed_forward_length": "9216",
-  "gemma2.attention.head_count": "8",
-  "gemma2.attention.head_count_kv": "4",
-  "gemma2.attention.key_length": "256",
-  "gemma2.attention.value_length": "256",
-  "gemma2.attention.layer_norm_rms_epsilon": "1e-06",
-  "tokenizer.ggml.model": "llama",
-  "tokenizer.ggml.add_bos_token": "true",
-  "tokenizer.ggml.add_eos_token": "false",
-  "tokenizer.ggml.bos_token_id": "2",
-  "tokenizer.ggml.eos_token_id": "1",
-  "tokenizer.ggml.padding_token_id": "0",
-  "tokenizer.ggml.unknown_token_id": "3",
-  "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
-  "tokenizer.ggml.token_type": "8d40143b3477df77beea4139420335ede458bf5e14102f01b0170197b55da8d8",
-  "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
-  "token_embd.weight": "64a9d30707e659e2e673656d71f5aef7a9fb9fd83bb9a77558dfc5abbe218a05",
-  "blk.0.attn_k.weight": "d8b4437c5edb3cddf6af9987038e1bb2b191c4f0fce0e160d2abace717f5d5d7",
-  "blk.0.attn_norm.weight": "1eb73e3f7aa8e502f6ca31cd19efbb8e4fd9a89692e13e48ac8205545a7fa7e8",
-  "blk.0.attn_output.weight": "39e7b78e57d356a22dd89ce1c4d7163b970712ba756545e1703f97866cd2192e",
-  "blk.0.attn_q.weight": "795058e23b6109febd9d55c89e1eebe6af0714ec8c56fd86a160876a6135ffe8",
-  "blk.0.attn_v.weight": "0cd6e583d1887c020472e961bbb113fe5a0d23ae2f1c2c876fc366cdb7692b52",
-  "blk.0.ffn_down.weight": "51eb4d962189e945a84e94e0dc1aad3f8f90cc1a11e18029670afcd0ea0acb1b",
-  "blk.0.ffn_gate.weight": "9811a29b8ad48432925897ab21dfcb13c5cbd372aeccbbefca9b7866883b4ce3",
-  "blk.0.ffn_norm.weight": "92cbf4652ef503c1de5b10f2be00b3fcf00100980cb3baa8f3013a8d8bf3d851",
-  "blk.0.ffn_up.weight": "af87de21746879483ed1b374cdd76b19ba11ca2b6dbb1beba98efdf3be3e8077",
-  "blk.0.post_attention_norm.weight": "32e135f1f258ffe407018899e39af1725d59d66d60022b9a21575ba160e0357a",
-  "blk.0.post_ffw_norm.weight": "ba286f5ac11b07fbc986173708c66f1920427be5a6d108af38fa0a837c1c8eb6",
-  "blk.1.attn_k.weight": "51584435552051f7fade76beca582b3f7190cf7fc07adcf527c2774d4b1c3901",
-  "blk.1.attn_norm.weight": "6833104c7fbf35a7e799ae56c262b97fffa14789642aee14381b25acd21ed80a",
-  "blk.1.attn_output.weight": "14c39481369087bf292ac9a3ab2ef166f9fe376a9f90c246653213ef264febdc",
-  "blk.1.attn_q.weight": "443f64ae2229f857c69d6bebb7800b685786cb77884c3ae19d4286aeed081325",
-  "blk.1.attn_v.weight": "0df482de2038f1e4c8a7733ac0ddb69ad90759dab5968b942af0155588de4c4a",
-  "blk.1.ffn_down.weight": "66f30763a8bbbcaea609a0087ed75fadb5e771c06378dd2cea94cf17e492e8cf",
-  "blk.1.ffn_gate.weight": "a7151bff00a545fa18b2c92dcd2a14572ccf9beb957a6c494f1374e8ebe174c9",
-  "blk.1.ffn_norm.weight": "e197d71ea11b5276bc0167d2663b88089b3ff42b47ba91e85f6c5d95f6306435",
-  "blk.1.ffn_up.weight": "57c182e0b14cccd1350d388f0c616991702e74281db54637451b70f4ccc24f9b",
-  "blk.1.post_attention_norm.weight": "3c56f837168d784c2d8bac247c130bdca6610c095c8da4558c536ccad7605609",
-  "blk.1.post_ffw_norm.weight": "d2a51d320fd01069dd7ccaa7082f16a7faeb671885607d7900b10a89c354d0fa",
-  "blk.2.attn_k.weight": "bc103c818192de7ce36caaf89dc117be4df13fb902e0bd9a23c64edace5df9b6",
-  "blk.2.attn_norm.weight": "0f2503aa126083a5d6ac72481be1ef66c6014705b573682b35bd864e4749a3d5",
-  "blk.2.attn_output.weight": "05fcd4a1226e482f91803a266f72caca887a93e63c2d2ba5611ab3c68d38743a",
-  "blk.2.attn_q.weight": "6a10b5c2fd423d1e4c4fd60fa8c154a0159b6b2501ea79cae2ef19f45a674e5e",
-  "blk.2.attn_v.weight": "3cf891945a1f8ae7cc908a5c6b729ff5b70f4436c5ffdbf245cc0ed4cc19cd1b",
-  "blk.2.ffn_down.weight": "ea204fd04e0d2fc728a9861a459216bbfec629c152004ba625f52cd8837bd51e",
-  "blk.2.ffn_gate.weight": "3a3518729f1b8b64a82b8792f33987db5418fdb094be0263c68f146a5c38de54",
-  "blk.2.ffn_norm.weight": "754ede678b725de41a34b82f0edf7688b5c065be7c0d46df6f7ad9430d986884",
-  "blk.2.ffn_up.weight": "ffdcb88439f5828ffbd9fc844b03ff91637b790b9838097258cc3ae75935720c",
-  "blk.2.post_attention_norm.weight": "4b3f53b7ba26e8c36b2dfda3b7e5fc4b1065257cefdea235fc7df9af130ac2fd",
-  "blk.2.post_ffw_norm.weight": "e550369e26b8485e2b54ad34b34bc98af5494287dcc513c2c39cf1eaa5b89d07",
-  "blk.3.attn_k.weight": "89f24ea450e37d9e95757651a83205c085d81b354ee9489dd6310a391d8409f3",
-  "blk.3.attn_norm.weight": "24e2ea662b7cb822b4ca5cd61bc17f2709f406d990ec3b4a0dac1cc112db45cf",
-  "blk.3.attn_output.weight": "ac4dad69473c6e3fac56669212cadd8c34ecc5973d945972e974d94805334967",
-  "blk.3.attn_q.weight": "b6a9c9a7d4722b9096631c65de62228dfddca6e26edfe6af7fce01e116ef0f4c",
-  "blk.3.attn_v.weight": "f272a960a40093942309bc342a379984cbacec2d7bc64428db3f64e6b1887ed4",
-  "blk.3.ffn_down.weight": "c0188ba50d8228805982029c277fc0e87aa57473b8363037c648f6d006ff828a",
-  "blk.3.ffn_gate.weight": "a04aec1561ee6c0fbb18c3db49dc62fb533619cf697fd548cbf2279761aaec3b",
-  "blk.3.ffn_norm.weight": "bc053837d44087ec05eb5d9458357b2a5be787789b19cdbbdc694b57697f99a6",
-  "blk.3.ffn_up.weight": "b3ce8b274f20796d3b1a7c08ba27a919066f9de89a782faa544c4a8d6bea1382",
-  "blk.3.post_attention_norm.weight": "9c922dee7a7df5667289e2788e60170238239cee2dfdbbd9e435763f9f416718",
-  "blk.3.post_ffw_norm.weight": "b682544ac953ad2e0b49027ed8916f2e9d1aba5d1587bb4127ac703570c7a03a",
-  "blk.4.attn_k.weight": "143b0cbb4b787b95c2b6212374410e32173ccef2adb914908a2f89a7916de512",
-  "blk.4.attn_norm.weight": "5668f60491b780273745192662d02c9a92a4f692b29d16aa0bbc7413fec4f85b",
-  "blk.4.attn_output.weight": "b9f2bdb68be1e0cf66dd19f8fa2afb105910ad2ef394864cb32cea8f8944e0d5",
-  "blk.4.attn_q.weight": "ddcf1343dafbc2dfcd0b8741225af22fe4b54b2becce29240bd01c34265d126c",
-  "blk.4.attn_v.weight": "6dc7074366e7ed52d9f48c594dcc85bef738e096276cb99d28228c89eecc5b9c",
-  "blk.4.ffn_down.weight": "30334ffc59ce343cf2a1b973174acb7722823463adc07e19a99bd0f404bc9906",
-  "blk.4.ffn_gate.weight": "890f7c8af208d63b28db52c4b8c16c2288a382d87ff5a6a6d6b0a5b3bf27e6cd",
-  "blk.4.ffn_norm.weight": "ff0316cc7847221eb86a90c1ab441d4ee61553d410c66414a7755021b3b12448",
-  "blk.4.ffn_up.weight": "6af97d113f91564c636734f215e25ee602d48eb045458f300b3ec7582be0f41d",
-  "blk.4.post_attention_norm.weight": "69438f231e105e68216b078bdeb35a7cdc8b12c4e2845e18ecf4c8d361d6a321",
-  "blk.4.post_ffw_norm.weight": "0fd535da78bcf2b32c95b05b2b83dc49817393765be90d8cc1ed3d56f47b68ec",
-  "blk.5.attn_k.weight": "0166eb3c6d20dcf3d3c169e94caa8dee057535bb525e29f698fb6f8844f18a6c",
-  "blk.5.attn_norm.weight": "a7808f27f164023d5cde2be00fc23cac6c71aa0ddeb60bc23e12411b80087672",
-  "blk.5.attn_output.weight": "8b65b2027a0842b68c5308f91d6a31de9599d794157d77df8418b19f9e0d9334",
-  "blk.5.attn_q.weight": "966bc626ef2c2394d872087a41c126bb1b67d1d5f6de920204ef5e5b16c34003",
-  "blk.5.attn_v.weight": "9a362aef3f4437fbf0ef6e1ba785f3329c3db2960f93fe36547d2795e9c254ea",
-  "blk.5.ffn_down.weight": "63e53541d34197720c06f297aa8142ac6b6eec002c7987b296f26e8b1400f931",
-  "blk.5.ffn_gate.weight": "d9591fdd32f783e0fc26e20d5d587ee8971ac8ae2e4c818c6eac1c125c7c7f37",
-  "blk.5.ffn_norm.weight": "677334cc60ecce3a7f4ab3acda15d359353d7358872f614ad8914e3780e9fc6e",
-  "blk.5.ffn_up.weight": "a63764110e1c655ffbd55af0669b2dfe4cc29d0e198d33a8e5426461b08a85f7",
-  "blk.5.post_attention_norm.weight": "c55499f859b2c0a7f5cabceaae47309a5ad38bc29d0f4a8db81f1357023162a9",
-  "blk.5.post_ffw_norm.weight": "82752754665f842418f3e302cb5f43d1e0504dcd124c4b8ddb77018b2c793837",
-  "blk.6.attn_k.weight": "e20a5f0d6c807273c8d491439566b428497ac02097cf0aa55e33748c28e14be6",
-  "blk.6.attn_norm.weight": "2c6ba42fd3c73d72073ced03a32dd28d70a89ed9bbbc8fea1ba03a7ade951e6c",
-  "blk.6.attn_output.weight": "4de7c5c2f4a133a266e17ed8c14c52959466b54cc7ab9e19f789a33b4850f284",
-  "blk.6.attn_q.weight": "56462d921800e6b8cd2213fef04c4ff16d728905cb2f4c58e966d0a053a3b0ae",
-  "blk.6.attn_v.weight": "b758dcbff769d6240c2245ede1dbc62c4170a67c77458e866312589220fe29af",
-  "blk.6.ffn_down.weight": "582247fb3c2bf687cbe9413fe18d18ad47bef4b65df7d78905e10335c6134764",
-  "blk.6.ffn_gate.weight": "3035444d5286aefb7a6d04e55bc27e1fac7cf895cd5be02319a431b8e047b4ae",
-  "blk.6.ffn_norm.weight": "e582d24c66e01b96faa20ce6adfda3d8583b11e809bff89969927398175e369a",
-  "blk.6.ffn_up.weight": "6f4b7bbfedeacf61a4866ae0616c4ba6c9e856662e8f00ae6aaec7f52c53e7b4",
-  "blk.6.post_attention_norm.weight": "8fe51b50bd677d21586aecab0b565c4bf9fa68ad50bfe366f45e8fea3c657ca8",
-  "blk.6.post_ffw_norm.weight": "81ba3cb4c2bf5c546b86855b7a885d3fafededc67eb3a35cd3598b03c9e26e65",
-  "blk.7.attn_k.weight": "2e044179cdcae0946708c86bfea7aa0391e1f7e2a09b33fca035d384cc3ca758",
-  "blk.7.attn_norm.weight": "94b48c546b046803c60e75a3acb17a356b710735989938021b565f68df9b4985",
-  "blk.7.attn_output.weight": "65709b4ad7a581f4d75793d39d4032a359f6bcc0c3835205242a0b99e5b66824",
-  "blk.7.attn_q.weight": "8ded993c95d1f7caf201ceb6fa035cd6ed6d351b50b999fa9355dfee9486cb5b",
-  "blk.7.attn_v.weight": "c92d5e2d2d48397542bc03bea25bf39154075e66c5bb1ead85188505aa04ae91",
-  "blk.7.ffn_down.weight": "e8ba8fb57208805ef1dc23cd7c86e9a2d1fb7c52c3940d292cd5bb2eb24b3fac",
-  "blk.7.ffn_gate.weight": "f0f06d6a2e06c5ac252083bc61d05c814e6289d3f4e4a87d2f06918254c02c36",
-  "blk.7.ffn_norm.weight": "ebf8ef775f72624148e09d68a4332187a7a5020c521fe0623da1cd3485ad33e0",
-  "blk.7.ffn_up.weight": "a554adc4fc7122c247c77670e169916ba1794c787b5be30a2b36705138f1f746",
-  "blk.7.post_attention_norm.weight": "3aa6bc21d85c3a0c12b964e82b12feaedfdd13130c3cd2229228e24e0967ebdf",
-  "blk.7.post_ffw_norm.weight": "508bc7b19ee8ff08f0007c890133a462fc57c7e72b16ee8f6dd64def264ef876",
-  "blk.8.attn_k.weight": "363c8e74056642fe9e7c2f3f9769d57319cd3fa0a6022810189ab8d894322885",
-  "blk.8.attn_norm.weight": "685b49a1f1acb169f4df0bdd8e3de6943f3033cebad14b898a72000595610d92",
-  "blk.8.attn_output.weight": "7bde571e4efef1c6a6143f0526721dfb59e0a0ea0e1a3616a322b2eb937efa48",
-  "blk.8.attn_q.weight": "fc993dbc1074c28a0e1d85e5ab2f4ea6a9c6c1affe7ee56027000a275daed9b6",
-  "blk.8.attn_v.weight": "281e8791d3aef9b3864f1cb054da0ae0c2fef4ce0a58b1bad8bc136b2fa0f62b",
-  "blk.8.ffn_down.weight": "b1164a2578a7f87ed99c2bbc76c5dfbbbc6a1a803605391acc3f320fc989ffd7",
-  "blk.8.ffn_gate.weight": "6b39a3b3aaaa79aee61416b54d62160b9258042650e61c6b47bc77c2dd17daf3",
-  "blk.8.ffn_norm.weight": "17ea1362c72da27f12bc936500492035bdef3fd8f940cb12b57f37d42ba8ecb1",
-  "blk.8.ffn_up.weight": "bc3a7c47afc440d2bdf8fbe9ddf2c9220467472c60c8b4ded8c0f181470ec96c",
-  "blk.8.post_attention_norm.weight": "5c506204e00411ef9c8b4134d40eedcc19fffe68dd0af7d7cc49dcabf2dfac7e",
-  "blk.8.post_ffw_norm.weight": "002faec235c3678864e2901eed275ce4e9dc229164a91c9cd4c965142ba62305",
-  "blk.9.attn_k.weight": "0bab39d8c237f1b6d0010db40467142625a9e6f2e0e4c49a56c12b41e4e0b1fa",
-  "blk.9.attn_norm.weight": "de5f38e873b17f07aa7598831b89cc1cae2c9bc3eb2e042ee9af059d2563e84e",
-  "blk.9.attn_output.weight": "8a8184702c25a62df9ff309c0c7badc8587208523b2be3e8fa90ce7080573e6f",
-  "blk.9.attn_q.weight": "7c961b2431b09ddf95377acd07201cb91bf13d9cd3ae0f2c25c7d6a0358d9f50",
-  "blk.9.attn_v.weight": "e22d240cb4743067033e659cbf210ebe2ebbab3e1dea6ccbe5eaa982382ca038",
-  "blk.9.ffn_down.weight": "a426f81210f03d6ad53277416e1fdcdf37d8065e4817613edaf6c67a343426be",
-  "blk.9.ffn_gate.weight": "a82eba825cb77b8e64f85ff99ede2fc71bc9b01751eeb17e9e6c246ee12ea62e",
-  "blk.9.ffn_norm.weight": "1a97f9b1302a3a326d534c5c3fed2db6db0ae45fd0edd381a3e4fc1c75d81030",
-  "blk.9.ffn_up.weight": "5f20bac2bbf03bb42adb92fbf99561651e1edda57e0b61935ac7f6c08c0ed7cb",
-  "blk.9.post_attention_norm.weight": "9f9866d13988e1946b1e1c80d9374a92a6e3be33748f8eaed3e126d1e1a4c796",
-  "blk.9.post_ffw_norm.weight": "a6896dbf698db4dbbe5dbf12417d4fd80e9cad0c539c858892ec0aa5b046bb58",
-  "blk.10.attn_k.weight": "ca8446e5d21ecd4e6a70dca8d321be480be4fba94d70cba065205436feb44270",
-  "blk.10.attn_norm.weight": "4f41fe290e8f21f63b82151b6cce94bf7318d121468816b0c58af0ff7c1658ab",
-  "blk.10.attn_output.weight": "c626d2e9681c5c941bbde43dddfae1a8d4986bf2be4470857bc8e8bd7f869044",
-  "blk.10.attn_q.weight": "1e61b210a13a429977325cf15d781ab77d604cfa862f4270329cbd94237d5835",
-  "blk.10.attn_v.weight": "8ff8d3e3f058ec3b35ada1057f2ed59c06494d0e0be6a8dc3ff9edf9f0e1a115",
-  "blk.10.ffn_down.weight": "bcebc04219f8081a5f483e58103c0ddbbbc631a0a54fd6dd9d55778e041f70ee",
-  "blk.10.ffn_gate.weight": "7a23a1e620ef871384ddf9611ccdcfb893fbf013cc203ac8e72f745420f1eea0",
-  "blk.10.ffn_norm.weight": "e3a375e43c349a1c6c66c22328e513cc1af3137fe839e43dc8e9be2f65914fd7",
-  "blk.10.ffn_up.weight": "5d182e7c94369194fca5f19cbbe668a999911e57f3d363bc7fb6088428700cb9",
-  "blk.10.post_attention_norm.weight": "b841c6308296e8984f3c5f549c6e3a242f4b3e19141e1f54cc08de9c46759c09",
-  "blk.10.post_ffw_norm.weight": "9d66fa05b5c940208f634f5053d809094c99a2a10a1d1e8847c8281fbd99fb49",
-  "blk.11.attn_k.weight": "14adf24ebb2bb17b336ca81cec3e690fd854782f4440ca6c66cc1d7e7bf1c850",
-  "blk.11.attn_norm.weight": "2d2213f311f50414702b5b34f22aafb9d9a0b6787243e7578562583dc40ad195",
-  "blk.11.attn_output.weight": "de1f14cc2a7fff00cf11b229f0576999205f17b9536e97abc9d6de3cc79a7884",
-  "blk.11.attn_q.weight": "2bcc5c147524003109ece0be08b89ac8b25baa71416ffa76573c6c052ffc6eea",
-  "blk.11.attn_v.weight": "2e6ab8573070c22dc1e0d7aebe4d52123226dacf7822dcce06fadbb38fb036a4",
-  "blk.11.ffn_down.weight": "1b86902f4e36868421e5228b9445051f8290b292df22a6d1af836dcecc1f25c3",
-  "blk.11.ffn_gate.weight": "e756e8081bd0a16aea4a9ef5076ad102113524f7a3d50a3a77aaa7f7938b63e8",
-  "blk.11.ffn_norm.weight": "6913887267be227cf9d1991a3dd8db2e7e74bb9b5fbdfcb9ac954fd7d7b95b3b",
-  "blk.11.ffn_up.weight": "619a3ac0609ebdf42c3fb2b6e4b1db48df79e6dd8418d7ab8f1bbff13d8a6a50",
-  "blk.11.post_attention_norm.weight": "e4b4ba92cef7b6a78407e8ab1b0307d47dac6c3df7b6817e28038317ff662d7e",
-  "blk.11.post_ffw_norm.weight": "40aceeec58cb855f0c158c9cc217168fcd5d0e735567d587217b1d78df17bc5f",
-  "blk.12.attn_k.weight": "c54c5a4d4892522022d1aa2204cfc624f0b4042caa536e678967316293fe5cb1",
-  "blk.12.attn_norm.weight": "7cd2ef58298569ffdf244d9b390f3917245276c8206e5780af5f96d8c0bbb446",
-  "blk.12.attn_output.weight": "85495ef9cc8b3deb21f741bde463ff6493acae2be51f02ecdeef952cbdec3375",
-  "blk.12.attn_q.weight": "d19383f83fd119bfb8c0280c9515705c11d8e7d502019fcf8f49efeef0d106d0",
-  "blk.12.attn_v.weight": "869ac669ba49531d9128892a0e27cef15de508ff40cdf80cc1681dde50d09204",
-  "blk.12.ffn_down.weight": "578f39f8f9fc2f09138afc884a952d7cc3a9a31de4216acd10e88e19e0b75f8c",
-  "blk.12.ffn_gate.weight": "e29a0186bc6c4a0720246306e922d3a83f777dadcf4ac80bad468287031cc8b5",
-  "blk.12.ffn_norm.weight": "e1ee95c6584b5cb57fcf1db8ce2bcc03aff91eb389238c094a61c00dde93d1f2",
-  "blk.12.ffn_up.weight": "2a826f06d7cdfb3edc6ae250ff44363ef77a2a9cdf96313e23a331b99ebfa17d",
-  "blk.12.post_attention_norm.weight": "4bafc7699b948d5cbc0d3e09b418b06c6abc4651a61ada9609d9a2f21c7e5607",
-  "blk.12.post_ffw_norm.weight": "bbb8c34a7176bb1a49f9fe2bacca0bd26b673d52c0835b2e90fa11f2962f077f",
-  "blk.13.attn_k.weight": "ffeefccfe8255d1b694382012ff4134eee5fec9d9491c8d0ff0a13832d1a37e8",
-  "blk.13.attn_norm.weight": "35713726529e3887c4135a88e86e8a4d7270ba5b9f2d1ab462622fbf40a7cdce",
-  "blk.13.attn_output.weight": "0d60b7c5cd71190a9ef4b873b0f516be15447c32d83914db2794b14592b0b460",
-  "blk.13.attn_q.weight": "8296069e65bef794cefc61257fc65789b3cb22955e30f3df129205e5041b2222",
-  "blk.13.attn_v.weight": "ca0f4ab9d16a748fc643a5c0c7a19826a811bf2a4e7316a8c935d4bf0ce8abc6",
-  "blk.13.ffn_down.weight": "d5514e0c8e7b3ed1cbcc1605eb5be1733b6ab3514cf8a0508fc72f7d05ed8bcb",
-  "blk.13.ffn_gate.weight": "8108e517a82e08a3aefbbd267bfa50a1668f92a76273280ce8a6bc1f6dd61521",
-  "blk.13.ffn_norm.weight": "5fcb6132d2134bf1f835b904a99820fa501dbc57d2224129f7098bf3cabc1d36",
-  "blk.13.ffn_up.weight": "6d744b7cd390a3cae3aa350dd379b81246acd056a2259996b6aaadece8465ccc",
-  "blk.13.post_attention_norm.weight": "e08b14698912509790e9575b8676971fbb0a4d82d719367e3756c0d0c4ab8cc0",
-  "blk.13.post_ffw_norm.weight": "2b196e4450fc5f1e7367b2cf7fe33a15fe919fbcdd861d11002346f16e980535",
-  "blk.14.attn_k.weight": "120e5f48d7268dfd9ab5f4bc9cc57a7cec63ea9635f56b80d435eb22936e9483",
-  "blk.14.attn_norm.weight": "146367bcce4db72cc894419a2e0145a6f533507dd68e4739c10ee480308c401f",
-  "blk.14.attn_output.weight": "720fa0165e756876c5cb6ad9e2780dd910390933f3f8849e5add5da04266650b",
-  "blk.14.attn_q.weight": "f5183466f56219ca1aca52d8b82c2d966a4198fea40fdd6b39f4d8b06ca2a6dd",
-  "blk.14.attn_v.weight": "24f8ea3d5512cd37c43c8329cb0da0c90d1895aef763ac2dcee3fe5157ec50a2",
-  "blk.14.ffn_down.weight": "e29960965b384ae5ab3d898a4dbaa8fddd28fa0e477ac28bcac49dec12a5ac67",
-  "blk.14.ffn_gate.weight": "6d0d6a74bfe9692e8f8eedff0fc34fc4fa1c8687794f35f2e2b033ab2d7510b8",
-  "blk.14.ffn_norm.weight": "f7036c1a9a71e046c9d2af16e9218fda5dbb0f7241ab44747abed1f0f9d602ca",
-  "blk.14.ffn_up.weight": "7d69ea1424007ffc9c12247dd0308c616e93ac02a59ec341cfa48f92d6ce3b10",
-  "blk.14.post_attention_norm.weight": "65b9712834d9445d4236bec362f3fb795c20d60c541b3dc6dbb7914d9b493e41",
-  "blk.14.post_ffw_norm.weight": "9c6a8da2e4e437d5cfdf3b9097e9f8b64bf07946a048badec20f4d374613f38f",
-  "blk.15.attn_k.weight": "864bc618303a0e4ee67fb1d5e751de61e936cd51e96669dd86f8cd08f2305045",
-  "blk.15.attn_norm.weight": "f9f4187da6eeadc2fc5921d8fe669741697d16c13d71e4aaeb73b82f50dc577e",
-  "blk.15.attn_output.weight": "ce2419a0b097036b2a31f2f4ad731d5814bcc2ef4c511786e24471e5eefd273b",
-  "blk.15.attn_q.weight": "9539db5a970d11ebe99722d1e13fcd635e250033630811efe583d2f97778e4a9",
-  "blk.15.attn_v.weight": "1c834b48ccd88adaeabb7d8bcb6be0bcd6d5ac1354ce88fc28f19a1a96b81ab3",
-  "blk.15.ffn_down.weight": "bc1f97a65dde6fa2c1e5397afb612266944b343f2eaa868b635ddd25829f8a42",
-  "blk.15.ffn_gate.weight": "1b14529d57056b79037f6cb5008132e62cc35992353b38dda59572274623103b",
-  "blk.15.ffn_norm.weight": "9af77458de9ee55c66f93865759f9c2c398557f94f3fa8fa6af30543d7339cde",
-  "blk.15.ffn_up.weight": "41d524a26b61a9595816b4fd53cf57ef50a702e4ef32933ff6136dca9136a267",
-  "blk.15.post_attention_norm.weight": "c60a03cd0e63a7db5c80015e58e9b97ba2208caa19f66a6fef5c4447eca900ce",
-  "blk.15.post_ffw_norm.weight": "34f7f9f96769215bbc3d17084df091864aef96a6645b7d0b3b7d9bd92f1a4b0b",
-  "blk.16.attn_k.weight": "7e27240d9f3a8c6cf0f4a980113d43234f514eadc3e3e1792b86efb29ffb1a6d",
-  "blk.16.attn_norm.weight": "af798acc0899282a30448edec48223b3e8efda177090273e612d8eca5e377301",
-  "blk.16.attn_output.weight": "79df39a3709d3d53e84146291e0944a7a653d06705293d9ccb5648dceadb432c",
-  "blk.16.attn_q.weight": "db58a1c3b83ad294804e5fd7321005719e200659173466df5a52a182b80b7165",
-  "blk.16.attn_v.weight": "2af6d48cbaeb225b5c1a704f76abd89c8ab1521417695b112b4dcc2cbd39b74d",
-  "blk.16.ffn_down.weight": "fc1c813eb5e7da3d6194569d6cb21602fc6eff2dc8e1b0eb753f2d5df148189c",
-  "blk.16.ffn_gate.weight": "7a80bcbc42464bd55df4814a6edbd7b5c153e0428323bbe49de55e2d2add33e7",
-  "blk.16.ffn_norm.weight": "2041685ee926d30f3f2ae4ec35b5688f1cd834167a6359a7d4057eac804c58b2",
-  "blk.16.ffn_up.weight": "8da4b718973ac1d43b928829bc45e062fd101984d6c98dd825bd7c5d08ebfbe3",
-  "blk.16.post_attention_norm.weight": "975c48fe680a6167438a106140a8872eee7765191f152d80e3b8ddf47693e095",
-  "blk.16.post_ffw_norm.weight": "4de2d4d483acfe4fc77860ea929025df2f4e15c10729413f36a18c94eaa6d689",
-  "blk.17.attn_k.weight": "f937e61f0af8c4cd98ee742648eb60e02e579683e21d421071295a3b70aebaad",
-  "blk.17.attn_norm.weight": "c3270583ed28b7e423f5b170c59113234f258169b93a867d9274f4c10b7cb115",
-  "blk.17.attn_output.weight": "b8c1150e81e685e539a5dcf2c19047a24eba2b281fabe166674b1d71ef4612ea",
-  "blk.17.attn_q.weight": "c255100ae2011e7dc7e3bf3bc3ccd96d859fbb98581cae993d7b82c1ba8e8b39",
-  "blk.17.attn_v.weight": "5830bb0a555984c6485348067f70b5d22ae337c011aa9248dac2ff4c95944551",
-  "blk.17.ffn_down.weight": "8ff9a7cccaa3776434a9d895aae4fb5c36c736bf2ec98784226b4c234940fbb0",
-  "blk.17.ffn_gate.weight": "1b52876739712831c272911533da206f407b46034a1a4ae8a88c1f96b6bd5747",
-  "blk.17.ffn_norm.weight": "d0e16ba5e87c91b545334e022058c7d03849665c3b1a6298771b656531366b66",
-  "blk.17.ffn_up.weight": "4dd6211d01dbebbe21052708eddc242b082a58b5f18ed16479e17987c1d3432e",
-  "blk.17.post_attention_norm.weight": "6f49c775c7417dade77ba8268a0f8441c1e5ec28b5d7e4dc5ed07a04d04600c8",
-  "blk.17.post_ffw_norm.weight": "b91a0bb2e6679e9c9be06ad323adae441d00a3d673efb19d7c4954be2aa84b27",
-  "blk.18.attn_k.weight": "22b565ace1b4da8b33865a58625be1d90beea9891f29686a69fa9cf7c93217db",
-  "blk.18.attn_norm.weight": "3e0160d7063c8753de65d2356a66648e47d921efdc5c917efb8209892120f8db",
-  "blk.18.attn_output.weight": "e3180f0bb4ca90b31e9b08158db38e332de62dfbaefe34aa94cc316409331e09",
-  "blk.18.attn_q.weight": "f3a5a83614c3ba7ea41cdd5b1b0819a241ee2a951a381ce4a9e001d3f700ed8f",
-  "blk.18.attn_v.weight": "f3350a5984fb951fc738adcf78147e6d812ff1c576670c460cafc99c253c1654",
-  "blk.18.ffn_down.weight": "9e9d09b13a33525e14bdaee6efc65c551ac7cf7680e534b940ab122a3a7c1ac9",
-  "blk.18.ffn_gate.weight": "ebaec8b4b578a2e8d815baac12f1675c208f80c68074d5a18288a2e1a60680ee",
-  "blk.18.ffn_norm.weight": "33e7687c53a242f2f8dc7093a491c97b18d4a5a8c14d183f02bd586a770f05aa",
-  "blk.18.ffn_up.weight": "78a1816662378ce56cc870e705174492781897b3afd2d4d97a51f10f2f2987c1",
-  "blk.18.post_attention_norm.weight": "a58dde3f12df3e94cbc27d87c8ea86f89af8a388a506446ff6758f05399b05fc",
-  "blk.18.post_ffw_norm.weight": "cebf90cc143577d483cca27b032dfd82031ee59bdf17c0e2cf60a0a3ad5bf996",
-  "blk.19.attn_k.weight": "4683375d0599ac9e2232196aae1e90af13a14cae26e865465de5c8e257bb2055",
-  "blk.19.attn_norm.weight": "f3eba936bfb1814bbcb0a1d62739eb66daac839df8c9c836fe0e94860df88525",
-  "blk.19.attn_output.weight": "51c0f01d38a9dcfe9bdbc4643576fab164c1d9e4b7168b7695c0ee55e6965667",
-  "blk.19.attn_q.weight": "28d15b69b8416f2e7ddc88fe381cb1e2ef2ad705fb1c268139ba96498cc74848",
-  "blk.19.attn_v.weight": "6860f1cd720638e63a981fa2c0b4db900129826bcb9823c9ddf9fb8b1b9f3383",
-  "blk.19.ffn_down.weight": "bc7f2d7827ee01c2dd41401c7b3b1700ad3a4ff620e8bb734f92630d342dcc7f",
-  "blk.19.ffn_gate.weight": "54d03ef69ba373fc410fbca8f1e34a565d58e4296d9a035ff7e48340b9c848e7",
-  "blk.19.ffn_norm.weight": "9178fc796a340ee6e8128ca74c0cb6203d1adbed6927af4e5ac7863da57affc7",
-  "blk.19.ffn_up.weight": "a77bd708026c6e83ad5c79c223278e74621bcf74a9641c7818d96b595daaad20",
-  "blk.19.post_attention_norm.weight": "ae94aa26f4c411bf9496a6fd4a6df64ee589ee1ae9a04b531d45acc95721e582",
-  "blk.19.post_ffw_norm.weight": "9ad210700edeef12133bdcff04bf1c7f62b49f6f4a9ba483c7cdc59857c24a5c",
-  "blk.20.attn_k.weight": "e35bce1e9f4a7a09ef34721f57ea38cfca68c272f52d923fe50af8308f66cfaa",
-  "blk.20.attn_norm.weight": "644800f6926fd34f233795c4dec1151a295d2138ca8cac33e3e48167d26f8b41",
-  "blk.20.attn_output.weight": "8d3758cd236471741e1ad66c0710cb79077dc8c7a3a292d35bc551c0c5abe627",
-  "blk.20.attn_q.weight": "c333b1f0f6f956b5d73891df10b1a0321e55fc31c40d623a24e1f52caa6a998b",
-  "blk.20.attn_v.weight": "8562b418d0c4868a050fb19fa3fcaf50a8cf1c669f537d666c80c7b3a04714e1",
-  "blk.20.ffn_down.weight": "97efb608ac44cc804198faec3ee66eafe56ced6b7ca5359700c6f1df75b7205e",
-  "blk.20.ffn_gate.weight": "5c61151d86f28415c73c73d90ec088c646cbe5c1640197caf58eb501ba7db293",
-  "blk.20.ffn_norm.weight": "24bbe0a701afd4bbeea65b3edde712b3cbb2281043bbc43dbf250582453116ed",
-  "blk.20.ffn_up.weight": "e170cf68e249566aa99eb6f6b265679bf9a5a6b76830ba24e7e130c2515910c4",
-  "blk.20.post_attention_norm.weight": "e092d751cfe20dbf2d348358f3b38397bd83e4ed94d6bbaa6bbaddcd902b2ac4",
-  "blk.20.post_ffw_norm.weight": "219a18a47dcba76e669e4322223a5a9227bd3db1de3fbd3d3cfb22e54a783c5a",
-  "blk.21.attn_k.weight": "c3a095ebddb42c63824f1c98da65263dc88e4d790a26aa1632840b44f5cc7cb1",
-  "blk.21.attn_norm.weight": "ef8bbaded5fbc45ad9cf3985ae02174524e7090fe6362811124f942ef643bec7",
-  "blk.21.attn_output.weight": "668f018aba72baac6252aa3ad58569ddd55ab751a0dd8d7bcc9fb9b6efb4bf53",
-  "blk.21.attn_q.weight": "e759c65663089f3bbbd51847934c185e680c82f1249065d5d487da638e519e6d",
-  "blk.21.attn_v.weight": "2ff57762686cf9ba1f5a6be76503454b97556ce67f4ac98254bd0562231197ba",
-  "blk.21.ffn_down.weight": "3fd106556fb721b1c28ae3f4026bc83eb1b08ed910f2ba5f466c6b5f327d91cb",
-  "blk.21.ffn_gate.weight": "338022d882f4b6619e8054a6fb909696fa3eef3013cf69b65c3cacdfc5b9e42c",
-  "blk.21.ffn_norm.weight": "1e77660c23a3f9653ee721a863d1960f773d87437cabc4dc0a6e17ee3d4e5e44",
-  "blk.21.ffn_up.weight": "7d31b20fbc2e6eba8f350f170069dc36f0cb12f68fbc4206ec5022a74085ebcb",
-  "blk.21.post_attention_norm.weight": "9638bae8d8bdcd7ed68da282979cd84a07c41ff9cabcaea94ebc846a1803db23",
-  "blk.21.post_ffw_norm.weight": "d622ef11115fe0cbe04b727d5a3b6371e7f39bf08c8d5eb9bc6da52e3f3cfb9d",
-  "blk.22.attn_k.weight": "5c321cb29deffbe57de200dd206a62005f1e80acb86c4fd2349dd44c8d3594fd",
-  "blk.22.attn_norm.weight": "198d949705d7170a331d75889d8c7500c3635254dac2cc6aa4dc35d556584536",
-  "blk.22.attn_output.weight": "19805cd5d7025b457e5d41d70db8b3fd63c2dd0e4a94d3ef1704d50ef4e749e8",
-  "blk.22.attn_q.weight": "177836cd583fc87405975ddc21ebfebdaa090a0363799664c72caa3da851ae2c",
-  "blk.22.attn_v.weight": "fea255692483e30d0108f9e4e250eb3ed7dbda8d83f499b06519b8c223ae6096",
-  "blk.22.ffn_down.weight": "00cb8939f03e5817d6d412de8cf2c923c9568d5493e382cec7faf5718fb034eb",
-  "blk.22.ffn_gate.weight": "b0591065b91281b2fbd8a9567f3568d40479f680e1f0a29e27ae213f37642489",
-  "blk.22.ffn_norm.weight": "96b5c5d0737c2ceb8fc869f54adb9e5f46e28cb7b177c40f49fa926b923c00f8",
-  "blk.22.ffn_up.weight": "81f472185b24344ab0594ea8246cc6e200e0dc1cab4943e74fbe4ca19d5a9701",
-  "blk.22.post_attention_norm.weight": "27fa9aa6260aa3071e0391e1a1d49322dcb6e8072315b8a9b7064087108dbd06",
-  "blk.22.post_ffw_norm.weight": "f37e1dcd7f643d9545675ffe9dc527a11eba86eb204989c2f44f636b266d896a",
-  "blk.23.attn_k.weight": "5d82f36658a56c3f94d0bb2d61f65509c966fa6568f81812e0d3e338b380ef8c",
-  "blk.23.attn_norm.weight": "b7983f88d9cad88bc88a528923e6da592ad20e699965b223ebc10840fe1f4fec",
-  "blk.23.attn_output.weight": "59f97f80f430d71606aab0158a195aed29ccd3405e6c0a5c41c809be8eb01898",
-  "blk.23.attn_q.weight": "53ac4789fe958919cc02ea4222bcd64c0ea1b4baa54304bff46635bdf42f7490",
-  "blk.23.attn_v.weight": "ec8abe09b9e84dbb52c7a068094657c6d3c62fe551ba8d7c3a3f23da622e9756",
-  "blk.23.ffn_down.weight": "3cf547eccb1b82aa64f208cee9682d7f558ca84e0aead7d9d3d1420d90f3d992",
-  "blk.23.ffn_gate.weight": "366aa2486d911ba81eb519119e13807deacf7e9908bc1975a2a63e00d6b10124",
-  "blk.23.ffn_norm.weight": "6d1d4a4af34bb7dc090ac87d6457d398c3e0fb68bd2e2b60b099dc318b6cfac3",
-  "blk.23.ffn_up.weight": "53f76692e253f5d2420b3f200c731b9f3b7a83e379920b4a067c729b4674aa4d",
-  "blk.23.post_attention_norm.weight": "7c952fa0efa76b3f048c8c4c9e8dcb5e3724d231327eda6423a34d3f3d3367de",
-  "blk.23.post_ffw_norm.weight": "7ab188cfe61f0a91b40309a0ab6bfa99f19d0ff2a37b6ac10e5f0c7f44eb5270",
-  "blk.24.attn_k.weight": "225798792f9bfdd10eff0505ebe61e0aad0209c17b431f6044ee7968ffe8c198",
-  "blk.24.attn_norm.weight": "635e3c1ebf5219bbebfc40ef164bc32d2b726ef595a94da64ac524ae878e2915",
-  "blk.24.attn_output.weight": "482f5bb2db8d9ed22b253d9a3296333b239efe698e5992e5d77e7e12dc2a5cf5",
-  "blk.24.attn_q.weight": "43805bbccddb65d58fffc4be9b5c374d4e1df1395ec1e1ffb4bcff03e98d5adb",
-  "blk.24.attn_v.weight": "fa741af54b4a3b1775d32f59134756090c5df2e7345a12a2d8db94fe289667a7",
-  "blk.24.ffn_down.weight": "83c6351e3162626b276f524a57836144625c2556dbe321b57cbd8fd486a68fab",
-  "blk.24.ffn_gate.weight": "fbe66be0d84d12cea5176cc7eaef64382ffc7324cd9d6266a3342dc43442f2ac",
-  "blk.24.ffn_norm.weight": "77c1445a8639ad24938bdf0280233eea2362d47391421833dfa72ec756dfc1e8",
-  "blk.24.ffn_up.weight": "78235ac729ee23c1cf1ae543751e3af32776d8808cee6e529c2a625a1f027654",
-  "blk.24.post_attention_norm.weight": "161f71b6d07628d43e4ae51a4c9088ec6ca2db123a17986a14505d83fdd04dad",
-  "blk.24.post_ffw_norm.weight": "cf1ba692aa683368b02ac413e69b2521b98c69a5274eacbb54165b53bf38a8b2",
-  "blk.25.attn_k.weight": "057a56bd8c8d2b41608d1f71faa3052902152ddf85e47669ad950c1c3e77c33f",
-  "blk.25.attn_norm.weight": "b7179fe02c334da556ddcf6c1b502245639a728c4cbba8b552d8e1df4565ee9d",
-  "blk.25.attn_output.weight": "4fed8b05b08a0ff75ffd022701bbeb52f17b23d09332a1ddcba737244bd0d3b0",
-  "blk.25.attn_q.weight": "c52e99f5d38bf7538d6106a0bbf38ac6dc6296bca9a3f849afa384ea67b4af01",
-  "blk.25.attn_v.weight": "c49c23d8e1cfa6a8eb971eb69942204890c6d7d830dc8774c84b108a80598912",
-  "blk.25.ffn_down.weight": "c08d4dc8412b19fdc870c164b83c341b236ec6fe7bb4a9bcfe0dc100faa20286",
-  "blk.25.ffn_gate.weight": "1a4cb3f36735d59181721471452807903006539e5e1b5ceb4f72d1d7ae134127",
-  "blk.25.ffn_norm.weight": "8fd6bd0dcec5198761525a36992a57c9ec5e9da60a22092839a84ae8c4e87f26",
-  "blk.25.ffn_up.weight": "3a00f39bdd5f31dc5e3b281d2002e1ac4f2475d49a0ac1d7720a25b377dcd04a",
-  "blk.25.post_attention_norm.weight": "e5f31a648612c859b6d21c9ee426e87a86cb1973dfdd86276c767371d9cef5ad",
-  "blk.25.post_ffw_norm.weight": "553c3bd774922c99c2384380a142d019881d30dbf0fe3bf9430dabfb3f6cbd33",
-  "output_norm.weight": "49445c4585ab0a8135717a0bdb1cda4a062a030177d0119561d91542aec5744b"
-}
--- a/convert/testdata/gemma-2-9b-it.json
+++ b/convert/testdata/gemma-2-9b-it.json
@ -1,6 +0,0 @@
-{
-  "general.architecture": "gemma2",
-  "gemma2.attention.sliding_window": "4096",
-  "gemma2.attn_logit_softcapping": "50",
-  "gemma2.final_logit_softcapping": "30"
-}
--- a/convert/testdata/gemma-2b-it.json
+++ b/convert/testdata/gemma-2b-it.json
@ -1,188 +0,0 @@
-{
-  "general.architecture": "gemma",
-  "general.file_type": "1",
-  "general.quantization_version": "2",
-  "gemma.block_count": "18",
-  "gemma.context_length": "8192",
-  "gemma.embedding_length": "2048",
-  "gemma.feed_forward_length": "16384",
-  "gemma.attention.head_count": "8",
-  "gemma.attention.head_count_kv": "1",
-  "gemma.attention.key_length": "256",
-  "gemma.attention.value_length": "256",
-  "gemma.attention.layer_norm_rms_epsilon": "1e-06",
-  "tokenizer.ggml.model": "llama",
-  "tokenizer.ggml.add_bos_token": "true",
-  "tokenizer.ggml.add_eos_token": "false",
-  "tokenizer.ggml.bos_token_id": "2",
-  "tokenizer.ggml.eos_token_id": "1",
-  "tokenizer.ggml.padding_token_id": "0",
-  "tokenizer.ggml.unknown_token_id": "3",
-  "tokenizer.ggml.scores": "0872465d173867d755d3ee728f882b9dc2057a0bfd596fe1e3d131522f1250d8",
-  "tokenizer.ggml.token_type": "485e40bf3d715a4764818fc097d6a2a41db872d82ee714bc500872a3437ff48d",
-  "tokenizer.ggml.tokens": "c6e66de1841f04de8b8d236d461ab720a4c9b9b5414dc293a09c6e10eab45fda",
-  "token_embd.weight": "17b87ab2c01c80657855a5413d0457b4a041afaeda0cc785080e44e2f04acf07",
-  "blk.0.attn_k.weight": "28ac0da05754ad2714ae95da28a5ad191192140b30b8fd22d108d4700c9d989f",
-  "blk.0.attn_norm.weight": "3f9d5675d1ab0eb8a816719dac9fab81f2e95c52be02c34263339acbc087febb",
-  "blk.0.attn_output.weight": "703295c2c63990ff896778685c678f145298886f680f3ed5dc2a7ad54c293265",
-  "blk.0.attn_q.weight": "69c2d0e4870e9d722a190d356203c9605575a16863466c3d1747966ef1cf5791",
-  "blk.0.attn_v.weight": "95219c9c07b5ffe9a9a01e456d845eef2b11f4fc12c93dbbba479db395444c13",
-  "blk.0.ffn_down.weight": "a2feb5eb3d572c57c5bafbf0ab506862df1160fe40965dcfe4b9fd855c08bed7",
-  "blk.0.ffn_gate.weight": "fcca072c445c31f4dc4d5dfaa785b1bdf7271342442099b74fd17268b5829fbf",
-  "blk.0.ffn_norm.weight": "7621f95dbd245cade6fffd6b08797d69d8e3954e960f0b5551b90d967ab95448",
-  "blk.0.ffn_up.weight": "14a9bcdd451403c67136391e1b6e53b3b1830f00199bd911dbcc56d8749c14f4",
-  "blk.1.attn_k.weight": "c70f73c5df20579cb44d971164b48b5f0d8d5abdb38b381e7a8b880ba12aa406",
-  "blk.1.attn_norm.weight": "88b6b91f93a1ef83425a7c7dc2a2fbd3b22704a04c64a80061df376ac8c33626",
-  "blk.1.attn_output.weight": "f031a537490c452be3b3bb51e6b7949a636405756e160976a1c070a792ea00ee",
-  "blk.1.attn_q.weight": "bdb23214b1cf9cfd30f863a0a5868e52c6809d93b7e8f44df096a94204d9896a",
-  "blk.1.attn_v.weight": "e9bbc0b05f2c872fb1403f8f938cd1612b502229ee401f12593b1164c61acc00",
-  "blk.1.ffn_down.weight": "5ff53811038b661a7b8f2bfdf213bebfb185ec1a6060b662f063714f33584d79",
-  "blk.1.ffn_gate.weight": "205085c8c951a5c7543b1495183cd96028fb49f67464b3e9862a2693a6077a33",
-  "blk.1.ffn_norm.weight": "798f354fc85afce9625f5d10093a585a966831698a0560e6c9b97ce659eb4b22",
-  "blk.1.ffn_up.weight": "db92dc5684cb6e90940e13f4d1da555ed20ba4f8cab1e990ddfd7553e2e91315",
-  "blk.2.attn_k.weight": "ef5ce360c4eed6d00d03ca4761e0f8e4b0af4509978468314be14f3d46621044",
-  "blk.2.attn_norm.weight": "6dadbc05dbd0d3fabb4216affa60a3de1378a82d2859dc90b338cbe70f50d455",
-  "blk.2.attn_output.weight": "6bbf87a966f691bbfd7c8d25629aa4e6710107bd431a667434861febb391edc5",
-  "blk.2.attn_q.weight": "4e575c09ae2de417ce9057ce8b073680e860a24aae13a472b68f101b760752e5",
-  "blk.2.attn_v.weight": "cd33f7f01141e9439afdaf2ea1aaced9feaa335e32a58daa136ebd555d4d96f4",
-  "blk.2.ffn_down.weight": "b970ff1b0b6494165defe2fbfa1d31425766ed71e64de9ec4e66ac3955c8bc5f",
-  "blk.2.ffn_gate.weight": "dbb3e1360402e0e369b101995bb686b73f95d4a7673f061be85d64d15dfb0061",
-  "blk.2.ffn_norm.weight": "bfb7980105d8ac9647710454f57a5cdac50598a0f6f4884e16f1d94b00844687",
-  "blk.2.ffn_up.weight": "50ef89339b275a438b664686f6227dd9b6e43853ed6856ec9e33ef4bbd90bda1",
-  "blk.3.attn_k.weight": "be942ea98151434eebcd2c1da4b00e0146152fe524a530689b1fd491cb833d21",
-  "blk.3.attn_norm.weight": "0df2f218daf609c289fb7c60c5f375fa99c0d4e04381ad5a494a19144edd8e20",
-  "blk.3.attn_output.weight": "c2184aaf86aa2cb8f47be49f60b165834e97205f39c6ee1dfd19fd4411a156ce",
-  "blk.3.attn_q.weight": "4f86e2a0a4221c1c84ff9c409ac89893cb95d7208cf65bf1e98e24e01125f991",
-  "blk.3.attn_v.weight": "abfdb8a60c349dadde641d1afc9542025e24fbf41a3238bfa9675e0b1f1e4b68",
-  "blk.3.ffn_down.weight": "58821a8d87008d47d122427911c6fad5272aca70c448bbae223256a74bacd07e",
-  "blk.3.ffn_gate.weight": "776e051f1a0ddd5c4934e69186683a75ca9a3c8c0f61911bba321fed1dd287d2",
-  "blk.3.ffn_norm.weight": "7f380f29335e28be90bfcfae6f6d69fdf5751211b36d2dd62aa5541ed113e4f2",
-  "blk.3.ffn_up.weight": "fc5ae8d488894cbd4951059675468d227da27871d26e925c9941863841c097ee",
-  "blk.4.attn_k.weight": "14833b078cc4c5137bdd5fdc0538047974ca147a99b0282e1b144440c78bc1db",
-  "blk.4.attn_norm.weight": "0a69957d4a15599fb80ad4753558020804925221457d9a5052926754d3768065",
-  "blk.4.attn_output.weight": "887a49b6130fb6297cf10767207c3dd97191b2cf63723449af9c27bca8dbeda0",
-  "blk.4.attn_q.weight": "51fd577b76764824dd6f0d4891c137ebe4736f591b5ca2793c5fff2be49abbde",
-  "blk.4.attn_v.weight": "1a623c43cf9c509d1b7ea0d1a5c04d0af4809665f9f9e93b7d6dba8c5df178fa",
-  "blk.4.ffn_down.weight": "5d61e8856d8941d2b1fd138116d015f63840d0fa1e31e20e20a5ceca1536ceec",
-  "blk.4.ffn_gate.weight": "06640f7273764f8ca5df7e386547417916b6cd7d565a8343153113239a94b0a1",
-  "blk.4.ffn_norm.weight": "91a6c6c41b894228e361435ecbc5058dca34d4911a23da5b56de219299c964d3",
-  "blk.4.ffn_up.weight": "d016dac1055e36d6a10b6317e57f98a904709ea892ef3194342f4d2f6326561e",
-  "blk.5.attn_k.weight": "987146afe124131500808cc0da33c06d207433656d41df6e6d8c99118a83bac5",
-  "blk.5.attn_norm.weight": "6b354938966f2608a2fb8d0f5b363ed0d8b0967c2ec8d0abd5c625b413042ded",
-  "blk.5.attn_output.weight": "cdcbfe02c6ff79d5326882b017a02099f5af71beedf6b1b3eb4de01e3a844536",
-  "blk.5.attn_q.weight": "b910d0cff781d3efb42eab0a302f46f286b2de717079175680d5b42bf8c309c8",
-  "blk.5.attn_v.weight": "66d3a279f747412f9f4b0e8abad44540c122ab2e811a7ee74c1f33bc36caade9",
-  "blk.5.ffn_down.weight": "c9b0efd2212981f16d956d8571f054b68780ad01f4917033647e359b557a4653",
-  "blk.5.ffn_gate.weight": "fe96b94109ca141c01f6a04788e20783019ca6ec334aa1f3134810bdb499e557",
-  "blk.5.ffn_norm.weight": "aa7b016e832e7055a36c6e20de58ea1936f995f390401fff1c5fc65906064e49",
-  "blk.5.ffn_up.weight": "555ce27c4873d3375394f38ad3b45e3d8848f9d5642dc1602383d0f0a33c2a14",
-  "blk.6.attn_k.weight": "88280d461db324c4f36475ce396793063e61a27283ec64511b0480890fb5b3b4",
-  "blk.6.attn_norm.weight": "af8f460c411f660d33196286d208f1845fd5a2b45f7b56549a4df31e7515447a",
-  "blk.6.attn_output.weight": "dd9996fb0a256e8375ad3917705258a33fce006bcea0f536caae420a77974d8b",
-  "blk.6.attn_q.weight": "7a4841541191e037cfb9b07930c4d8cab451809658b182f0ada6ccde9615c003",
-  "blk.6.attn_v.weight": "ae81e6a592b64d701a9d40233e986039a56cba8d8d24f61aea93c6393cf3078a",
-  "blk.6.ffn_down.weight": "622dd1ce1706355cbc659a8ab2c4509678ffe0f3ad34258e5e25ed2a5d951bcd",
-  "blk.6.ffn_gate.weight": "8389a735c0bd5591010f8ced9805a2a12c749f6df0d3c18ad4d05c2a302e7168",
-  "blk.6.ffn_norm.weight": "621f5346400382474d61358397bd58fb1459b07c53e376e4bca15e08b3f9b3fb",
-  "blk.6.ffn_up.weight": "8d834e4c42f13c251dfee36cf89e12f1bd400680d00d5c2e6cac0459e9ce2f7f",
-  "blk.7.attn_k.weight": "8bd0412de65a3e64901ef8fe6a28c95e116bf39dc9aa22f0126b9d36688e5ea7",
-  "blk.7.attn_norm.weight": "056d8e56be4e87d6dc6f900762f0dc6fde07bfdc50dd85bfc510415e2bba3f3d",
-  "blk.7.attn_output.weight": "27972eda51da53d416ff95aed78149a2c5a287b47d2cd46f2f544ca692ecb3bb",
-  "blk.7.attn_q.weight": "41eca977b9371f7932800c11a9c45b931310196919e2a0651b847703b180fc7f",
-  "blk.7.attn_v.weight": "13c74fd7e07f08883a09fb070a1fe5bbdd2341b4cb8d1cac07c4b637049b5774",
-  "blk.7.ffn_down.weight": "9e75db42468800849a9a7da603d0072c5e86c8ed2b4d8b20a312a51fb86a7a10",
-  "blk.7.ffn_gate.weight": "db6bdc3117f910088aaf7db51f2da63ea5bd933de36af5599c215bfb26f7db2b",
-  "blk.7.ffn_norm.weight": "48bb82b49bfc8679a1e77f282ee182d952db7a3c11be7ef9a102ee2ddd8011e2",
-  "blk.7.ffn_up.weight": "feebea87175817a0f3585ec0af09dc873d94c203581ae97a712eb356d3b49efe",
-  "blk.8.attn_k.weight": "d5640ad71b6af68d88e17bf8e7fc26c907d2262605457a84247dd9afc2884d69",
-  "blk.8.attn_norm.weight": "75b850c481a69083ae09d0207ba7317b37c735a39fcf5fef5400e6c84fb1257f",
-  "blk.8.attn_output.weight": "cbd669dbdea2bdd90f9f0cc97566b3dffff3c56cecb4f47290ceef30da83b2d6",
-  "blk.8.attn_q.weight": "9edcb63087a431bac361822497e6ecdaa06d9ea4a1a754e36da7ba9f8db81c7c",
-  "blk.8.attn_v.weight": "3fb72c2c4f95a83626aa3e30062f9450b09ab37c7871e229f18bbc5cf744633c",
-  "blk.8.ffn_down.weight": "bd69d2c9172974fff154441b237b4787fb53b2d185325442d5048130ef5bc4ef",
-  "blk.8.ffn_gate.weight": "d04689c80553edd011d1cbaa5d570fffa7fa91e88b66cf1352d89ab60b72f908",
-  "blk.8.ffn_norm.weight": "e49984183b735b7f2c4e4730c289eed9394056d2e283a00fd83ea0915df31a73",
-  "blk.8.ffn_up.weight": "8fe62a1ce8e847e567add6c6f6bf2922bc467495b5eb4c116b3cb85b85b3b211",
-  "blk.9.attn_k.weight": "d90904959e5004cf0d6e729c6bff18cc33c094798b802473c1ec55ab8d276183",
-  "blk.9.attn_norm.weight": "79277f290cc07411115d8fa138045edf4a17b3416ab2145409cbe8ab829fd4ee",
-  "blk.9.attn_output.weight": "5a21bf2e1f09a81405025f96d4153ffb630158e17269cff8ffff935c38ceb1a7",
-  "blk.9.attn_q.weight": "51b1d0febc3b350945be4504f55afa4347517bde0f710e1a4b88e6b17e71e7c7",
-  "blk.9.attn_v.weight": "aab7e1db0a8b50a03036356791ffce736ab010d15674c96eaef8049d80076054",
-  "blk.9.ffn_down.weight": "cbf43ec84becb40c9359a181ab0e641fd7faae7d34b549501f7cfb7afdc3d764",
-  "blk.9.ffn_gate.weight": "dce0e8661c778327bed7f03b6790d26710764188aed9dc746e6e05863891fa57",
-  "blk.9.ffn_norm.weight": "6d41642104f995c77bf31122b13237caebda3e7fcccb1367ce91db36b015e923",
-  "blk.9.ffn_up.weight": "82fe4c67bf24e7b2d6f6e05f7b1234c2bf90c3932951091a9066211b8e15ecbb",
-  "blk.10.attn_k.weight": "f6a9ed8fd8d3229b5d03175c413ffc56a07f2ce7236271986361dd3d8993f9aa",
-  "blk.10.attn_norm.weight": "cebbef89f0326ca8e02df3867a571e4d61c20c2a12f295f98ae590d62bc86010",
-  "blk.10.attn_output.weight": "34f5efb86accb4f06347d83a32558ea8eab3039d128969161a741ebacbb656ff",
-  "blk.10.attn_q.weight": "1e0efe27df2d5d50f7157253ba2cfd436d6781c3dc78ca176d0c16a210b5b763",
-  "blk.10.attn_v.weight": "8f085bf50a2b0f83cd6cdda3c8ef5a9e204a36348ed95871aac725d1f68640cf",
-  "blk.10.ffn_down.weight": "bf3b3cb4cace435809ac7b4cc933f20853af12f1f272d3dcefe7f19c0f203b8b",
-  "blk.10.ffn_gate.weight": "d3df7a1413b1c5adf1a1dcda9e5225a15c89874bae53bb6137ad1ea42fca2d34",
-  "blk.10.ffn_norm.weight": "a1da603b0480471b5ed8e862148cecd5fed918f8304d6933ab0bdb25b8d2fb8f",
-  "blk.10.ffn_up.weight": "bffbba605922e972dc47dda88a0b4659aa52236c76e5fe861a949e6d9a367492",
-  "blk.11.attn_k.weight": "9f31c63d66cd32c29b1eb8bb829d0c8525ce2ae936e0eefdaab6335a2d12a3df",
-  "blk.11.attn_norm.weight": "0bde1a266d8b2e8f202bb7e2e88b19147ca83021901f6d3cae77a4df5548c754",
-  "blk.11.attn_output.weight": "e10725c7cf746ed4a7e472cf7aea6cb564e5db6a1d5197adc980d650a387ccea",
-  "blk.11.attn_q.weight": "05ee758a7d065802630f8c65dca424364c1c8825e389aa33f9405c45e8a50cce",
-  "blk.11.attn_v.weight": "0c3ae7090f11775d24c51120db6e305db6aff706493e7ee123dcab74485ba789",
-  "blk.11.ffn_down.weight": "7ba40b8e12c09c5fb2006b77a771cb01ce894e88a3b3e1877f927a5b89c91709",
-  "blk.11.ffn_gate.weight": "db76388a023b98097972d354ba1c6a5e26efdeb1c596b9c28bf2cd8f6596975e",
-  "blk.11.ffn_norm.weight": "a38c3ae1b89a68ddc7b72c99c5b28be7fe3787c4fad9904d0c43d64eaf00c474",
-  "blk.11.ffn_up.weight": "13c8142f9cf1eddc658babf978daf3515c4ccc45f849f3e7e3930aa18a8480a0",
-  "blk.12.attn_k.weight": "f03241c36ac87cb57429a2ef22186b8d7d0b590a8b173beb01fa13d93772f3b1",
-  "blk.12.attn_norm.weight": "4568f654e6d65104d586e7c16ba960c83428698ce103022b7e0be15e2884e13b",
-  "blk.12.attn_output.weight": "04867603f82f91e41306e09b33ecda0104b3ee4834061f2c0bbdc8da33c72509",
-  "blk.12.attn_q.weight": "70fe04b9a8e08b6100cc8d6b58bf4cbbad15ca1de82d63baca5d352ba6c4cbae",
-  "blk.12.attn_v.weight": "15cb28db61a86c98687991d7e611bc92a1fcc6007f3432149cfb5fe518a4f65e",
-  "blk.12.ffn_down.weight": "6d10c790a4e3dc44c2dc36d96251ae97cdf30a4fa04d4c43e31bfbd038e6a7b7",
-  "blk.12.ffn_gate.weight": "3462a2d8f6b4743b25e24da51b90018ac2858d05ac7e582bcb69063cfdac1104",
-  "blk.12.ffn_norm.weight": "1f96392c1faa34e34ae5dea55a6a86c5aa4c79758952075d53d28de89dd88456",
-  "blk.12.ffn_up.weight": "d22eacc612a7411953d948483c5fb201e11722955ee0754da866e7bec578ac6d",
-  "blk.13.attn_k.weight": "5864977e6b733ea942647d6feed5c76156c48c200649c22e4e11b9e5860e57f3",
-  "blk.13.attn_norm.weight": "87e053535144723db4145aa5402acc54331b7696752d852bb9fc542ff33f0fb5",
-  "blk.13.attn_output.weight": "078145f5ad83f8b14f97a869346f7fd1583b24d1e3edadaa95d3da4242973f8f",
-  "blk.13.attn_q.weight": "3b8caf35504cbc4d1a7dd6e011a95760703b7f71e2218b030b1254f811362dd7",
-  "blk.13.attn_v.weight": "4fdf8365a603e043e5b40c4a21c84ac167f9be62794178f9d8a608dfe5653bf9",
-  "blk.13.ffn_down.weight": "a07d3abbfcacf48ba028df2cab895be32cc15022d23389a745286e79c1b1d1fd",
-  "blk.13.ffn_gate.weight": "1d2ab39666aa2909acc96787432a3ed13b19d25170f74665fadff9b17bbaffb1",
-  "blk.13.ffn_norm.weight": "4f2e809fda5f3eadf52578ee50e0ba36e53be91e55dce418c12dfe595f5f18e7",
-  "blk.13.ffn_up.weight": "8783d2720c2c37ca176a5801e0b3ef1f9cc9cf3ef1cd37af423aaf6b2a27e2bd",
-  "blk.14.attn_k.weight": "ce9428e2b55d43ae0c6690dbd56182f99adc427694ba8236b405cc8ea5035e86",
-  "blk.14.attn_norm.weight": "6abb35f9db8251d6ae954bda147c6ada2371b0574d11702e828f3c6ac99b7cc0",
-  "blk.14.attn_output.weight": "fe3880916d0ceb5bff672c88bbefb7060a545be609bf049beb2024b38221836d",
-  "blk.14.attn_q.weight": "7c8ad81be6f4a350931fd108b5f7c9e366e8c26ef62d1d85ffef5dca8fd893f8",
-  "blk.14.attn_v.weight": "e4bdedffacbebe38567a0734dfd67db90e911d9a9669fcde9a7c4ad8a0066c52",
-  "blk.14.ffn_down.weight": "ef6694dff1e05820aac0cd2b22f39ac7788b4967afc9250775575554c66aab2c",
-  "blk.14.ffn_gate.weight": "db63c4179e2db704bc505e2b4696e055b593e295a1b7c4c586fc793bdd5aab19",
-  "blk.14.ffn_norm.weight": "2796a62d832a9710148f95d533320492a33e712b2e5218659c548705bd11684d",
-  "blk.14.ffn_up.weight": "3f78c78d8c2d54df45f799d4ff902316628af296834afe4ceed63d4a324ff03e",
-  "blk.15.attn_k.weight": "6e810ee3859e07695645ee0c9a5efc7962668984a5f0a9325f47e462743b447c",
-  "blk.15.attn_norm.weight": "0956b576ae96db0b28cb09f761f801cfd9281432284664f0fe181c8d9c55d1ec",
-  "blk.15.attn_output.weight": "03a17f7e94208177aace5cc41b7f54670ba57873b7274ff6e23caf58cce110ca",
-  "blk.15.attn_q.weight": "b8edafe7d2216a6f8b4ae4905a906475490e6ea418f6e1d3cec563dbdc6fab91",
-  "blk.15.attn_v.weight": "f8ae8cae0f4cfa34a459824eba57350c3c248104ba5607e7d9dc7d7c39aaf4a6",
-  "blk.15.ffn_down.weight": "8d02eb439da852246d2ca67e9b7b6de0b090b80744355e64728a23e41926505b",
-  "blk.15.ffn_gate.weight": "ed5bf361c67db8731f186b775826f21c33bdb521111fd2d922539719a770239f",
-  "blk.15.ffn_norm.weight": "5942ca3c73209ac9a0c8bfd9b4aab7f7be7aee9aa12d9c35833493b44af76767",
-  "blk.15.ffn_up.weight": "f4bebf4ad99ec5f911327dec347be6c595814885309c7bc5647ce28c7f4d1cf5",
-  "blk.16.attn_k.weight": "756a534c19364448e0958b8948fe33891c6ccda0fbb4dfa2024e1f532a87804b",
-  "blk.16.attn_norm.weight": "386b7b9e4e6509f6af9c022d942b6c6c6cc136aeed8751ecb037c74d7c4bfb93",
-  "blk.16.attn_output.weight": "3ba1a766a25830b84d7c22178203635f9c5624caad290bc5e5d73da5d5e7a2ec",
-  "blk.16.attn_q.weight": "d39b0c91e1fda7685d50a0f7cc8d18c44b5bdc90a142c7fda0bc329cca1afa74",
-  "blk.16.attn_v.weight": "98b33fcb0ee3483cff1b06ecb44d7b7ffb4d34c268248e4d73dfdf82b2065b2f",
-  "blk.16.ffn_down.weight": "14006f5e4acb2f9416271ae562e299359cd2585739c7fc77ccbca54495563948",
-  "blk.16.ffn_gate.weight": "12f8abae2d301d8f88bedb6af98b1daecc7b0b8d05148594f931f30958d77aca",
-  "blk.16.ffn_norm.weight": "129a15a046ee96d06de288bd43c80f77a6b0fb3a159c7367154c6e4aaf362672",
-  "blk.16.ffn_up.weight": "b4a5911a45f3871ef1d4efb7dc7108645a564b70f818eccf45beebef2e844ee9",
-  "blk.17.attn_k.weight": "5e1bfcff0146ebdde3817b656952892eb671e14e75afc92fa53f84f8eecbec4c",
-  "blk.17.attn_norm.weight": "60bc988fab7c4b29ee9de599df41a8de00caa94fcd74677da011fac82f60f465",
-  "blk.17.attn_output.weight": "ba49b40d6a0b5685f749c24b0edbed3adc44dbe13b5d5e5fa1e56169fc746555",
-  "blk.17.attn_q.weight": "82bb415d24efcd14d03ace03f907bb70db6a204c76a0bdd1892e0fba165db87d",
-  "blk.17.attn_v.weight": "73dbe54beb91a899884e275ea81ffc5187a20cb7d5b68d5c299b783096999d94",
-  "blk.17.ffn_down.weight": "7c086166241e0664f8963fd1ca4ed74c737abfb2525ec20f8435821ff50158f3",
-  "blk.17.ffn_gate.weight": "51a32f78244d42a539f619c5ce661db9e6cf41636280a826d439b5444edcd28c",
-  "blk.17.ffn_norm.weight": "c4bb247fccd1ecc84875028af63dd20aaf5cbd17eb94a9bc36679c09285dccab",
-  "blk.17.ffn_up.weight": "b5886182790bc6fbadd63de9bc4ffee416f3b69a66280d197ab8c18edf769abf",
-  "output_norm.weight": "481f3097d0a20412e35b3a739b1b958487bcd41ff67744baa3c9acbddd2ee4d4"
-}
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@ -1,12 +1,10 @@
 package convert

 import (
+	"cmp"
 	"crypto/sha256"
-	"encoding/hex"
 	"encoding/json"
-	"errors"
 	"fmt"
-	"io/fs"
 	"log/slog"
 	"os"
 	"slices"
@ -14,152 +12,10 @@ import (
 	"golang.org/x/exp/maps"
 )

-const (
-	_ int32 = iota
-	tokenTypeNormal
-	tokenTypeUnknown
-	tokenTypeControl
-	tokenTypeUserDefined
-	tokenTypeUnused
-	tokenTypeByte
-)
-
 type Tokenizer struct {
-	*Vocabulary
-	SpecialVocabulary []*SpecialVocabulary
-	Merges            []string
-
-	Pre      string
-	Template string
-}
-
-func parseTokenizer(fsys fs.FS, specialTokenTypes []string) (*Tokenizer, error) {
-	v, err := parseVocabulary(fsys)
-	if err != nil {
-		return nil, err
-	}
-
-	t := &Tokenizer{
-		Vocabulary: v,
-		Pre:        "default",
-	}
-
-	addedTokens := make(map[string]token)
-	if f, err := fsys.Open("tokenizer.json"); errors.Is(err, os.ErrNotExist) {
-	} else if err != nil {
-		return nil, err
-	} else {
-		defer f.Close()
-
-		var tt tokenizer
-		if err := json.NewDecoder(f).Decode(&tt); err != nil {
-			return nil, err
-		}
-
-		for _, t := range tt.AddedTokens {
-			addedTokens[t.Content] = t
-		}
-
-		t.Merges = tt.Model.Merges
-
-		sha256sum := sha256.New()
-		for _, pt := range tt.PreTokenizer.PreTokenizers {
-			switch pt.Type {
-			case "Split":
-				if pt.Pattern.Regex != "" {
-					// create a checksum of all Split pretokenizers which should be sufficient
-					// to identify the pretokenizer
-					sha256sum.Write([]byte(pt.Pattern.Regex))
-				}
-			}
-		}
-
-		switch digest := hex.EncodeToString(sha256sum.Sum(nil)); digest {
-		case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
-			t.Pre = "llama-bpe"
-		case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
-			t.Pre = "deepseek-llm"
-		case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
-			t.Pre = "deepseek-coder"
-		case "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855":
-			// noop, empty pretokenizer
-		default:
-			slog.Warn("unknown pretokenizer, using default", "digest", digest)
-		}
-	}
-
-	if f, err := fsys.Open("tokenizer_config.json"); errors.Is(err, os.ErrNotExist) {
-	} else if err != nil {
-		return nil, err
-	} else {
-		defer f.Close()
-
-		var p map[string]json.RawMessage
-		if err := json.NewDecoder(f).Decode(&p); err != nil {
-			return nil, err
-		}
-
-		if template, ok := p["chat_template"]; ok {
-			var s []struct {
-				Name     string `json:"name"`
-				Template string `json:"template"`
-			}
-			if err := json.Unmarshal(template, &t.Template); err == nil {
-				// noop
-			} else if err := json.Unmarshal(template, &s); err == nil {
-				for _, e := range s {
-					if e.Name == "default" {
-						t.Template = e.Template
-						break
-					}
-				}
-			} else {
-				return nil, fmt.Errorf("invalid chat_template: %w", err)
-			}
-		}
-
-		for _, st := range specialTokenTypes {
-			sv := SpecialVocabulary{Type: st}
-			if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
-				if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
-					return nil, err
-				}
-			}
-
-			if bts, ok := p[fmt.Sprintf("%s_token", st)]; ok {
-				var content string
-				if err := json.Unmarshal(bts, &content); err != nil {
-					var mm map[string]any
-					if err := json.Unmarshal(bts, &mm); err != nil {
-						continue
-					}
-
-					content, ok = mm["content"].(string)
-					if !ok {
-						continue
-					}
-				}
-
-				sv.Content = content
-			}
-
-			if id, ok := addedTokens[sv.Content]; ok {
-				sv.ID = id.ID
-				t.SpecialVocabulary = append(t.SpecialVocabulary, &sv)
-			}
-		}
-	}
-
-	return t, nil
-}
-
-type tokenizer struct {
-	AddedTokens []token `json:"added_tokens"`
-	Model       struct {
-		Type   string         `json:"type"`
-		Vocab  map[string]int `json:"vocab"`
-		Merges []string       `json:"merges"`
-	} `json:"model"`
+	Version     string         `json:"version"`
+	AddedTokens []Token        `json:"added_tokens"`
+	Model       TokenizerModel `json:"model"`

 	PreTokenizer struct {
 		PreTokenizers []struct {
@ -171,108 +27,83 @@ type tokenizer struct {
 	} `json:"pre_tokenizer"`
 }

-type token struct {
+type TokenizerModel struct {
+	Type   string         `json:"type"`
+	Vocab  map[string]int `json:"vocab"`
+	Merges []string       `json:"merges"`
+	Tokens []Token
+}
+
+type Token struct {
 	ID          int    `json:"id"`
 	Content     string `json:"content"`
 	Special     bool   `json:"special"`
 	UserDefined bool
 }

-type Vocabulary struct {
-	Model  string
-	Tokens []string
-	Scores []float32
-	Types  []int32
+func (t *Token) Type() int32 {
+	switch {
+	case t.Special:
+		return tokenTypeControl
+	case t.UserDefined:
+		return tokenTypeUserDefined
+	default:
+		return tokenTypeNormal
+	}
 }

-func parseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) {
-	f, err := fsys.Open("tokenizer.json")
+func (t *Tokenizer) maxID() int {
+	return max(
+		slices.Max(maps.Values(t.Model.Vocab)),
+		slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
+			return cmp.Compare(a.ID, b.ID)
+		}).ID,
+	)
+}
+
+func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
+	f, err := os.Open(dirpath)
 	if err != nil {
-		return nil, err
+		panic(err)
 	}
 	defer f.Close()

-	var t tokenizer
+	var t Tokenizer
 	if err := json.NewDecoder(f).Decode(&t); err != nil {
-		return nil, err
+		return "", nil, nil, err
 	}

-	tokens := make(map[int]token, len(t.Model.Vocab))
+	tokens = make([]Token, t.maxID()+1)
 	for k, v := range t.Model.Vocab {
-		tokens[v] = token{
-			ID:      v,
-			Content: k,
+		tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
+	}
+
+	for _, v := range t.AddedTokens {
+		v.UserDefined = true
+		tokens[v.ID] = v
+	}
+
+	sha256sum := sha256.New()
+	for _, pt := range t.PreTokenizer.PreTokenizers {
+		switch pt.Type {
+		case "Split":
+			if pt.Pattern.Regex != "" {
+				sha256sum.Write([]byte(pt.Pattern.Regex))
+			}
 		}
 	}

-	for _, token := range t.AddedTokens {
-		token.UserDefined = true
-		tokens[token.ID] = token
+	switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
+	case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
+		pre = "llama-bpe"
+	case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
+		pre = "deepseek-llm"
+	case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
+		pre = "deepseek-coder"
+	default:
+		slog.Warn("unknown pretokenizer, using default", "digest", digest)
+		pre = "default"
 	}

-	keys := maps.Keys(tokens)
-	slices.Sort(keys)
-
-	v := Vocabulary{Model: "gpt2"}
-	for _, k := range keys {
-		token := tokens[k]
-		v.Tokens = append(v.Tokens, token.Content)
-		v.Scores = append(v.Scores, float32(token.ID))
-
-		switch {
-		case token.Special:
-			v.Types = append(v.Types, tokenTypeControl)
-		case token.UserDefined:
-			v.Types = append(v.Types, tokenTypeUserDefined)
-		default:
-			v.Types = append(v.Types, tokenTypeNormal)
-		}
-	}
-
-	return &v, nil
-}
-
-func parseVocabulary(fsys fs.FS) (*Vocabulary, error) {
-	patterns := []struct {
-		Pattern string
-		Func    func(fs.FS) (*Vocabulary, error)
-	}{
-		{"tokenizer.model", parseSentencePiece},
-		{"tokenizer.json", parseVocabularyFromTokenizer},
-	}
-
-	for _, pattern := range patterns {
-		if _, err := fs.Stat(fsys, pattern.Pattern); errors.Is(err, os.ErrNotExist) {
-			continue
-		} else if err != nil {
-			return nil, err
-		}
-
-		return pattern.Func(fsys)
-	}
-
-	return nil, errors.New("unknown tokenizer format")
-}
-
-type SpecialVocabulary struct {
-	Type     string
-	ID       int
-	Content  string
-	AddToken bool
-}
-
-func (sv SpecialVocabulary) Key() string {
-	switch t := sv.Type; t {
-	case "bos", "eos", "cls", "mask":
-		return t
-	case "unk":
-		return "unknown"
-	case "sep":
-		//nolint:misspell // this is an upstream typo
-		return "seperator"
-	case "pad":
-		return "padding"
-	}
-
-	panic("unknown special vocabulary type")
+	return pre, tokens, t.Model.Merges, nil
 }
--- a/convert/tokenizer_spm.go
+++ b/convert/tokenizer_spm.go
@ -1,113 +0,0 @@
-package convert
-
-import (
-	"cmp"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io/fs"
-	"os"
-	"slices"
-
-	"google.golang.org/protobuf/proto"
-
-	"github.com/ollama/ollama/convert/sentencepiece"
-)
-
-func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
-	ast, err := parseAdditionalSpecialTokens(fsys)
-	if err != nil {
-		return nil, err
-	}
-
-	bts, err := fs.ReadFile(fsys, "tokenizer.model")
-	if err != nil {
-		return nil, err
-	}
-
-	var spm sentencepiece.ModelProto
-	if err := proto.Unmarshal(bts, &spm); err != nil {
-		return nil, err
-	}
-
-	v := Vocabulary{Model: "llama"}
-	for _, piece := range spm.GetPieces() {
-		v.Tokens = append(v.Tokens, piece.GetPiece())
-		v.Scores = append(v.Scores, piece.GetScore())
-
-		switch t := piece.GetType(); t {
-		case sentencepiece.ModelProto_SentencePiece_UNKNOWN,
-			sentencepiece.ModelProto_SentencePiece_CONTROL,
-			sentencepiece.ModelProto_SentencePiece_UNUSED,
-			sentencepiece.ModelProto_SentencePiece_BYTE:
-			v.Types = append(v.Types, int32(t))
-		default:
-			tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
-			if slices.Contains(ast, piece.GetPiece()) {
-				tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
-			}
-
-			v.Types = append(v.Types, tt)
-		}
-	}
-
-	f, err := fsys.Open("added_tokens.json")
-	if errors.Is(err, os.ErrNotExist) {
-		return &v, nil
-	} else if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var atm map[string]int
-	if err := json.NewDecoder(f).Decode(&atm); err != nil {
-		return nil, err
-	}
-
-	type t struct {
-		id      int
-		content string
-	}
-
-	var ts []t
-	for content, id := range atm {
-		ts = append(ts, t{id, content})
-	}
-
-	slices.SortFunc(ts, func(i, j t) int {
-		return cmp.Compare(i.id, j.id)
-	})
-
-	n := len(v.Tokens)
-	for i, t := range ts {
-		if t.id != i+n {
-			return nil, fmt.Errorf("invalid token id: %d", t.id)
-		}
-
-		v.Tokens = append(v.Tokens, t.content)
-		v.Scores = append(v.Scores, -1000.0)
-		v.Types = append(v.Types, tokenTypeUserDefined)
-	}
-
-	return &v, nil
-}
-
-func parseAdditionalSpecialTokens(fsys fs.FS) ([]string, error) {
-	f, err := fsys.Open("special_tokens_map.json")
-	if errors.Is(err, os.ErrNotExist) {
-		return nil, nil
-	} else if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var m struct {
-		AdditionalSpecialTokens []string `json:"additional_special_tokens"`
-	}
-
-	if err := json.NewDecoder(f).Decode(&m); err != nil {
-		return nil, err
-	}
-
-	return m.AdditionalSpecialTokens, nil
-}
--- a/convert/tokenizer_test.go
+++ b/convert/tokenizer_test.go
@ -1,208 +0,0 @@
-package convert
-
-import (
-	"io"
-	"io/fs"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/google/go-cmp/cmp"
-)
-
-func createTokenizerFS(t *testing.T, dir string, files map[string]io.Reader) fs.FS {
-	t.Helper()
-
-	for k, v := range files {
-		if err := func() error {
-			f, err := os.Create(filepath.Join(dir, k))
-			if err != nil {
-				return err
-			}
-			defer f.Close()
-
-			if _, err := io.Copy(f, v); err != nil {
-				return err
-			}
-
-			return nil
-		}(); err != nil {
-			t.Fatalf("unexpected error: %v", err)
-		}
-	}
-
-	return os.DirFS(dir)
-}
-
-func TestParseTokenizer(t *testing.T) {
-	cases := []struct {
-		name              string
-		fsys              fs.FS
-		specialTokenTypes []string
-		want              *Tokenizer
-	}{
-		{
-			name: "string chat template",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{}`),
-				"tokenizer_config.json": strings.NewReader(`{
-					"chat_template": "<default template>"
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{Model: "gpt2"},
-				Pre:        "default",
-				Template:   "<default template>",
-			},
-		},
-		{
-			name: "list chat template",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{}`),
-				"tokenizer_config.json": strings.NewReader(`{
-					"chat_template": [
-						{
-							"name": "default",
-							"template": "<default template>"
-						},
-						{
-							"name": "tools",
-							"template": "<tools template>"
-						}
-					]
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{Model: "gpt2"},
-				Pre:        "default",
-				Template:   "<default template>",
-			},
-		},
-		{
-			name: "added tokens",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{
-					"added_tokens": [
-						{
-							"id": 999,
-							"content": "<unused999>",
-							"special": false
-						}
-					]
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{
-					Model:  "gpt2",
-					Tokens: []string{"<unused999>"},
-					Scores: []float32{999},
-					Types:  []int32{4},
-				},
-				Pre: "default",
-			},
-		},
-		{
-			name: "added tokens overlap vocab",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{
-					"added_tokens": [
-						{
-							"id": 0,
-							"content": "<pad>",
-							"special": true
-						}
-					],
-					"model": {
-						"vocab": {
-							"<pad>": 0
-						}
-					}
-				}`),
-			}),
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{
-					Model:  "gpt2",
-					Tokens: []string{"<pad>"},
-					Scores: []float32{0},
-					Types:  []int32{3},
-				},
-				Pre: "default",
-			},
-		},
-		{
-			name: "special token types",
-			fsys: createTokenizerFS(t, t.TempDir(), map[string]io.Reader{
-				"tokenizer.json": strings.NewReader(`{
-					"added_tokens": [
-						{
-							"id": 0,
-							"content": "<pad>",
-							"special": true
-						},
-						{
-							"id": 1,
-							"content": "<eos>",
-							"special": true
-						},
-						{
-							"id": 2,
-							"content": "<bos>",
-							"special": true
-						},
-						{
-							"id": 3,
-							"content": "<unk>",
-							"special": true
-						}
-					],
-					"model": {
-						"vocab": {
-							"<pad>": 0,
-							"<eos>": 1,
-							"<bos>": 2,
-							"<unk>": 3
-						}
-					}
-				}`),
-				"tokenizer_config.json": strings.NewReader(`{
-					"add_bos_token": true,
-					"add_eos_token": false,
-					"bos_token": "<bos>",
-					"eos_token": "<eos>",
-					"pad_token": "<pad>",
-					"unk_token": "<unk>"
-				}`),
-			}),
-			specialTokenTypes: []string{"pad", "eos", "bos", "unk"},
-			want: &Tokenizer{
-				Vocabulary: &Vocabulary{
-					Model:  "gpt2",
-					Tokens: []string{"<pad>", "<eos>", "<bos>", "<unk>"},
-					Scores: []float32{0, 1, 2, 3},
-					Types:  []int32{3, 3, 3, 3},
-				},
-				SpecialVocabulary: []*SpecialVocabulary{
-					{Type: "pad", Content: "<pad>", ID: 0, AddToken: false},
-					{Type: "eos", Content: "<eos>", ID: 1, AddToken: false},
-					{Type: "bos", Content: "<bos>", ID: 2, AddToken: true},
-					{Type: "unk", Content: "<unk>", ID: 3, AddToken: false},
-				},
-				Pre: "default",
-			},
-		},
-	}
-
-	for _, tt := range cases {
-		t.Run(tt.name, func(t *testing.T) {
-			tokenizer, err := parseTokenizer(tt.fsys, tt.specialTokenTypes)
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-
-			if diff := cmp.Diff(tt.want, tokenizer); diff != "" {
-				t.Errorf("unexpected tokenizer (-want +got):\n%s", diff)
-			}
-		})
-	}
-}
--- a/convert/torch.go
+++ b/convert/torch.go
@ -0,0 +1,288 @@
+package convert
+
+import (
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	"github.com/nlpodyssey/gopickle/pytorch"
+	"github.com/nlpodyssey/gopickle/types"
+	"github.com/x448/float16"
+
+	"github.com/ollama/ollama/llm"
+)
+
+type torchWriterTo struct {
+	t *llm.Tensor
+
+	params *Params
+	bo     ByteOrder
+
+	storage  pytorch.StorageInterface
+	repacker func(string, []float32, []uint64) ([]float32, error)
+}
+
+type TorchFormat struct{}
+
+func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
+	slog.Debug("getting torch tensors")
+
+	var files []string
+	if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
+		files = append(files, pt...)
+	} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
+		files = append(files, pt...)
+	}
+
+	var offset uint64
+	var tensors []llm.Tensor
+	for _, fn := range files {
+		m, err := pytorch.Load(fn)
+		if err != nil {
+			slog.Error(fmt.Sprintf("error unpickling: %q", err))
+			return []llm.Tensor{}, err
+		}
+
+		for _, k := range m.(*types.Dict).Keys() {
+			if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
+				continue
+			}
+
+			t, _ := m.(*types.Dict).Get(k)
+			tshape := t.(*pytorch.Tensor).Size
+
+			var size uint64
+			var kind uint32
+			switch len(tshape) {
+			case 0:
+				continue
+			case 1:
+				// convert to float32
+				kind = 0
+				size = uint64(tshape[0] * 4)
+			case 2:
+				// convert to float16
+				kind = 1
+				size = uint64(tshape[0] * tshape[1] * 2)
+			}
+
+			ggufName, err := tf.GetLayerName(k.(string))
+			if err != nil {
+				slog.Error(err.Error())
+				return nil, err
+			}
+			slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
+
+			shape := []uint64{0, 0, 0, 0}
+			for i := range tshape {
+				shape[i] = uint64(tshape[i])
+			}
+
+			tensor := llm.Tensor{
+				Name:   ggufName,
+				Kind:   kind,
+				Offset: offset, // calculate the offset
+				Shape:  shape[:],
+			}
+
+			tensor.WriterTo = torchWriterTo{
+				t:       &tensor,
+				params:  params,
+				bo:      params.ByteOrder,
+				storage: t.(*pytorch.Tensor).Source,
+			}
+
+			tensors = append(tensors, tensor)
+			offset += size
+		}
+	}
+
+	return tensors, nil
+
+}
+
+func getAltParams(dirpath string) (*Params, error) {
+	f, err := os.Open(filepath.Join(dirpath, "params.json"))
+	if err != nil {
+		slog.Error("no params.json")
+		return nil, err
+	}
+	defer f.Close()
+
+	type TorchParams struct {
+		HiddenSize     int     `json:"dim"`
+		AttentionHeads int     `json:"n_heads"`
+		KeyValHeads    int     `json:"n_kv_heads"`
+		HiddenLayers   int     `json:"n_layers"`
+		RopeTheta      float64 `json:"rope_theta"`
+		NormEPS        float64 `json:"norm_eps"`
+	}
+
+	var tparams TorchParams
+
+	d := json.NewDecoder(f)
+	err = d.Decode(&tparams)
+	if err != nil {
+		return nil, err
+	}
+
+	params := &Params{
+		Architectures:  []string{"LlamaForCausalLM"},
+		HiddenSize:     tparams.HiddenSize,
+		AttentionHeads: tparams.AttentionHeads,
+		KeyValHeads:    tparams.KeyValHeads,
+		HiddenLayers:   tparams.HiddenLayers,
+		NormEPS:        tparams.NormEPS,
+	}
+
+	switch {
+	case tparams.RopeTheta == 1000000:
+		// Codellama
+		params.ContextSize = 16384
+	case tparams.NormEPS == 1e-06:
+		// llama2
+		slog.Debug("Found llama2 - setting context size to 4096")
+		params.ContextSize = 4096
+	default:
+		params.ContextSize = 2048
+	}
+
+	params.ByteOrder = binary.LittleEndian
+	return params, nil
+}
+
+func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
+	f, err := os.Open(filepath.Join(dirpath, "config.json"))
+	if err != nil {
+		if os.IsNotExist(err) {
+			// try params.json instead
+			return getAltParams(dirpath)
+		} else {
+			return nil, err
+		}
+	}
+
+	var params Params
+	d := json.NewDecoder(f)
+	err = d.Decode(&params)
+	if err != nil {
+		return nil, err
+	}
+
+	params.ByteOrder = binary.LittleEndian
+	return &params, nil
+}
+
+func (m *TorchFormat) GetLayerName(n string) (string, error) {
+	directMap := map[string]string{
+		"tok_embeddings.weight":     "token_embd.weight",
+		"output.weight":             "output.weight",
+		"norm.weight":               "output_norm.weight",
+		"rope.freqs":                "rope_freqs.weight",
+		"model.embed_tokens.weight": "token_embd.weight",
+		"lm_head.weight":            "output.weight",
+		"model.norm.weight":         "output_norm.weight",
+	}
+
+	lMap := map[string]string{
+		"layers.(\\d+).attention_norm.weight":                 "blk.$1.attn_norm.weight",
+		"layers.(\\d+).attention_output_norm.weight":          "blk.$1.attn_norm.weight",
+		"layers.(\\d+).feed_forward.w2.weight":                "blk.$1.ffn_down.weight",
+		"layers.(\\d+).feed_forward.w1.weight":                "blk.$1.ffn_gate.weight",
+		"layers.(\\d+).feed_forward.w3.weight":                "blk.$1.ffn_up.weight",
+		"layers.(\\d+).ffn_norm.weight":                       "blk.$1.ffn_norm.weight",
+		"layers.(\\d+).attention.wk.weight":                   "blk.$1.attn_k.weight",
+		"layers.(\\d+).attention.wo.weight":                   "blk.$1.attn_output.weight",
+		"layers.(\\d+).attention.wq.weight":                   "blk.$1.attn_q.weight",
+		"layers.(\\d+).attention.wv.weight":                   "blk.$1.attn_v.weight",
+		"model.layers.(\\d+).input_layernorm.weight":          "blk.$1.attn_norm.weight",
+		"model.layers.(\\d+).mlp.down_proj.weight":            "blk.$1.ffn_down.weight",
+		"model.layers.(\\d+).mlp.gate_proj.weight":            "blk.$1.ffn_gate.weight",
+		"model.layers.(\\d+).mlp.up_proj.weight":              "blk.$1.ffn_up.weight",
+		"model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
+		"model.layers.(\\d+).self_attn.k_proj.weight":         "blk.$1.attn_k.weight",
+		"model.layers.(\\d+).self_attn.o_proj.weight":         "blk.$1.attn_output.weight",
+		"model.layers.(\\d+).self_attn.q_proj.weight":         "blk.$1.attn_q.weight",
+		"model.layers.(\\d+).self_attn.v_proj.weight":         "blk.$1.attn_v.weight",
+	}
+
+	v, ok := directMap[n]
+	if ok {
+		return v, nil
+	}
+
+	// quick hack to rename the layers to gguf format
+	for k, v := range lMap {
+		re := regexp.MustCompile(k)
+		newName := re.ReplaceAllString(n, v)
+		if newName != n {
+			return newName, nil
+		}
+	}
+
+	return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
+}
+
+func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
+	var f32s []float32
+	switch s := r.storage.(type) {
+	case *pytorch.FloatStorage:
+		f32s = s.Data
+	case *pytorch.HalfStorage:
+		f32s = s.Data
+	case *pytorch.BFloat16Storage:
+		f32s = s.Data
+	default:
+		return 0, fmt.Errorf("unknown data type: %T", s)
+	}
+
+	if r.repacker != nil {
+		f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
+		if err != nil {
+			return 0, err
+		}
+	}
+
+	switch r.t.Kind {
+	case 0:
+		return 0, binary.Write(w, r.bo, f32s)
+	case 1:
+		f16s := make([]uint16, len(f32s))
+		for i := range f32s {
+			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
+		}
+
+		return 0, binary.Write(w, r.bo, f16s)
+	default:
+		return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
+	}
+}
+
+func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
+	switch len(params.Architectures) {
+	case 0:
+		return nil, fmt.Errorf("No architecture specified to convert")
+	case 1:
+		switch params.Architectures[0] {
+		case "LlamaForCausalLM":
+			return &LlamaModel{
+				ModelData{
+					Name:   name,
+					Path:   dirPath,
+					Params: params,
+					Format: m,
+				},
+			}, nil
+		default:
+			return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
+		}
+	}
+
+	return nil, fmt.Errorf("Unknown error")
+}
--- a/discover/amd_linux.go
+++ b/discover/amd_linux.go
@ -1,535 +0,0 @@
-package discover
-
-import (
-	"bufio"
-	"errors"
-	"fmt"
-	"io"
-	"io/fs"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"regexp"
-	"slices"
-	"sort"
-	"strconv"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/format"
-)
-
-// Discovery logic for AMD/ROCm GPUs
-
-const (
-	DriverVersionFile     = "/sys/module/amdgpu/version"
-	AMDNodesSysfsDir      = "/sys/class/kfd/kfd/topology/nodes/"
-	GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
-
-	// Prefix with the node dir
-	GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
-
-	// Direct Rendering Manager sysfs location
-	DRMDeviceDirGlob   = "/sys/class/drm/card*/device"
-	DRMTotalMemoryFile = "mem_info_vram_total"
-	DRMUsedMemoryFile  = "mem_info_vram_used"
-
-	// In hex; properties file is in decimal
-	DRMUniqueIDFile = "unique_id"
-	DRMVendorFile   = "vendor"
-	DRMDeviceFile   = "device"
-)
-
-var (
-	// Used to validate if the given ROCm lib is usable
-	ROCmLibGlobs          = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
-	RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
-)
-
-// Gather GPU information from the amdgpu driver if any supported GPUs are detected
-// Only called once during bootstrap
-func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
-	resp := []RocmGPUInfo{}
-	if !AMDDetected() {
-		return resp, fmt.Errorf("AMD GPUs not detected")
-	}
-
-	// Opportunistic logging of driver version to aid in troubleshooting
-	driverMajor, driverMinor, err := AMDDriverVersion()
-	if err != nil {
-		// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
-		slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
-	}
-
-	// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
-	var visibleDevices []string
-	hipVD := envconfig.HipVisibleDevices()   // zero based index only
-	rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID
-	gpuDO := envconfig.GpuDeviceOrdinal()    // zero based index
-	switch {
-	case rocrVD != "":
-		visibleDevices = strings.Split(rocrVD, ",")
-	case hipVD != "":
-		visibleDevices = strings.Split(hipVD, ",")
-	case gpuDO != "":
-		visibleDevices = strings.Split(gpuDO, ",")
-	}
-
-	gfxOverride := envconfig.HsaOverrideGfxVersion()
-	var supported []string
-	libDir := ""
-
-	// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
-	// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
-	matches, _ := filepath.Glob(GPUPropertiesFileGlob)
-	sort.Slice(matches, func(i, j int) bool {
-		// /sys/class/kfd/kfd/topology/nodes/<number>/properties
-		a, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[i])), 10, 64)
-		if err != nil {
-			slog.Debug("parse err", "error", err, "match", matches[i])
-			return false
-		}
-		b, err := strconv.ParseInt(filepath.Base(filepath.Dir(matches[j])), 10, 64)
-		if err != nil {
-			slog.Debug("parse err", "error", err, "match", matches[i])
-			return false
-		}
-		return a < b
-	})
-	gpuCount := 0
-	for _, match := range matches {
-		slog.Debug("evaluating amdgpu node " + match)
-		fp, err := os.Open(match)
-		if err != nil {
-			slog.Debug("failed to open sysfs node", "file", match, "error", err)
-			continue
-		}
-		defer fp.Close()
-
-		scanner := bufio.NewScanner(fp)
-		isCPU := false
-		var major, minor, patch uint64
-		var vendor, device, uniqueID uint64
-		for scanner.Scan() {
-			line := strings.TrimSpace(scanner.Text())
-			// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
-			if strings.HasPrefix(line, "gfx_target_version") {
-				ver := strings.Fields(line)
-
-				// Detect CPUs
-				if len(ver) == 2 && ver[1] == "0" {
-					slog.Debug("detected CPU " + match)
-					isCPU = true
-					break
-				}
-
-				if len(ver) != 2 || len(ver[1]) < 5 {
-					slog.Warn("malformed "+match, "gfx_target_version", line)
-					// If this winds up being a CPU, our offsets may be wrong
-					continue
-				}
-				l := len(ver[1])
-				var err1, err2, err3 error
-				patch, err1 = strconv.ParseUint(ver[1][l-2:l], 10, 32)
-				minor, err2 = strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
-				major, err3 = strconv.ParseUint(ver[1][:l-4], 10, 32)
-				if err1 != nil || err2 != nil || err3 != nil {
-					slog.Debug("malformed int " + line)
-					continue
-				}
-			} else if strings.HasPrefix(line, "vendor_id") {
-				ver := strings.Fields(line)
-				if len(ver) != 2 {
-					slog.Debug("malformed", "vendor_id", line)
-					continue
-				}
-				vendor, err = strconv.ParseUint(ver[1], 10, 64)
-				if err != nil {
-					slog.Debug("malformed", "vendor_id", line, "error", err)
-				}
-			} else if strings.HasPrefix(line, "device_id") {
-				ver := strings.Fields(line)
-				if len(ver) != 2 {
-					slog.Debug("malformed", "device_id", line)
-					continue
-				}
-				device, err = strconv.ParseUint(ver[1], 10, 64)
-				if err != nil {
-					slog.Debug("malformed", "device_id", line, "error", err)
-				}
-			} else if strings.HasPrefix(line, "unique_id") {
-				ver := strings.Fields(line)
-				if len(ver) != 2 {
-					slog.Debug("malformed", "unique_id", line)
-					continue
-				}
-				uniqueID, err = strconv.ParseUint(ver[1], 10, 64)
-				if err != nil {
-					slog.Debug("malformed", "unique_id", line, "error", err)
-				}
-			}
-			// TODO - any other properties we want to extract and record?
-			// vendor_id + device_id -> pci lookup for "Name"
-			// Other metrics that may help us understand relative performance between multiple GPUs
-		}
-
-		// Note: while ./mem_banks/*/used_memory exists, it doesn't appear to take other VRAM consumers
-		// into consideration, so we instead map the device over to the DRM driver sysfs nodes which
-		// do reliably report VRAM usage.
-
-		if isCPU {
-			continue
-		}
-
-		// Skip over any GPUs that are masked
-		if major == 0 && minor == 0 && patch == 0 {
-			slog.Debug("skipping gpu with gfx000")
-			continue
-		}
-
-		// Keep track of numeric IDs based on valid GPUs
-		gpuID := gpuCount
-		gpuCount += 1
-
-		// Look up the memory for the current node
-		totalMemory := uint64(0)
-		usedMemory := uint64(0)
-		var usedFile string
-		mapping := []struct {
-			id       uint64
-			filename string
-		}{
-			{vendor, DRMVendorFile},
-			{device, DRMDeviceFile},
-			{uniqueID, DRMUniqueIDFile}, // Not all devices will report this
-		}
-		slog.Debug("mapping amdgpu to drm sysfs nodes", "amdgpu", match, "vendor", vendor, "device", device, "unique_id", uniqueID)
-		// Map over to DRM location to find the total/free memory
-		drmMatches, _ := filepath.Glob(DRMDeviceDirGlob)
-		for _, devDir := range drmMatches {
-			matched := true
-			for _, m := range mapping {
-				if m.id == 0 {
-					// Null ID means it didn't populate, so we can't use it to match
-					continue
-				}
-				filename := filepath.Join(devDir, m.filename)
-				buf, err := os.ReadFile(filename)
-				if err != nil {
-					slog.Debug("failed to read sysfs node", "file", filename, "error", err)
-					matched = false
-					break
-				}
-				// values here are in hex, strip off the lead 0x and parse so we can compare the numeric (decimal) values in amdgpu
-				cmp, err := strconv.ParseUint(strings.TrimPrefix(strings.TrimSpace(string(buf)), "0x"), 16, 64)
-				if err != nil {
-					slog.Debug("failed to parse sysfs node", "file", filename, "error", err)
-					matched = false
-					break
-				}
-				if cmp != m.id {
-					matched = false
-					break
-				}
-			}
-			if !matched {
-				continue
-			}
-
-			// Found the matching DRM directory
-			slog.Debug("matched", "amdgpu", match, "drm", devDir)
-			totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
-			buf, err := os.ReadFile(totalFile)
-			if err != nil {
-				slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
-				break
-			}
-			totalMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
-			if err != nil {
-				slog.Debug("failed to parse sysfs node", "file", totalFile, "error", err)
-				break
-			}
-
-			usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
-			usedMemory, err = getFreeMemory(usedFile)
-			if err != nil {
-				slog.Debug("failed to update used memory", "error", err)
-			}
-			break
-		}
-
-		var name string
-		// TODO - PCI ID lookup
-		if vendor > 0 && device > 0 {
-			name = fmt.Sprintf("%04x:%04x", vendor, device)
-		}
-
-		// Favor UUIDs if available to reduce possibility of getting the numeric IDs wrong
-		var ID string
-		if uniqueID != 0 {
-			ID = fmt.Sprintf("GPU-%016x", uniqueID)
-		} else {
-			ID = strconv.Itoa(gpuID)
-		}
-
-		gpuInfo := RocmGPUInfo{
-			GpuInfo: GpuInfo{
-				Library: "rocm",
-				memInfo: memInfo{
-					TotalMemory: totalMemory,
-					FreeMemory:  (totalMemory - usedMemory),
-				},
-				ID:            ID,
-				Name:          name,
-				Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
-				MinimumMemory: rocmMinimumMemory,
-				DriverMajor:   driverMajor,
-				DriverMinor:   driverMinor,
-			},
-			usedFilepath: usedFile,
-			index:        gpuID,
-		}
-
-		// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
-		if totalMemory < IGPUMemLimit {
-			reason := "unsupported Radeon iGPU detected skipping"
-			slog.Info(reason, "id", gpuID, "total", format.HumanBytes2(totalMemory))
-			unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-				GpuInfo: gpuInfo.GpuInfo,
-				Reason:  reason,
-			})
-			continue
-		}
-
-		if int(major) < RocmComputeMin {
-			reason := fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch)
-			slog.Warn(reason, "gpu", gpuID)
-			unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-				GpuInfo: gpuInfo.GpuInfo,
-				Reason:  reason,
-			})
-
-			continue
-		}
-
-		slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
-		slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
-
-		// If the user wants to filter to a subset of devices, filter out if we aren't a match
-		if len(visibleDevices) > 0 {
-			include := false
-			for _, visible := range visibleDevices {
-				if visible == gpuInfo.ID || visible == strconv.Itoa(gpuInfo.index) {
-					include = true
-					break
-				}
-			}
-			if !include {
-				reason := "filtering out device per user request"
-				slog.Info(reason, "id", gpuInfo.ID, "visible_devices", visibleDevices)
-				unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-					GpuInfo: gpuInfo.GpuInfo,
-					Reason:  reason,
-				})
-
-				continue
-			}
-		}
-
-		// Final validation is gfx compatibility - load the library if we haven't already loaded it
-		// even if the user overrides, we still need to validate the library
-		if libDir == "" {
-			libDir, err = AMDValidateLibDir()
-			if err != nil {
-				err = fmt.Errorf("unable to verify rocm library: %w", err)
-				slog.Warn(err.Error())
-				unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-					GpuInfo: gpuInfo.GpuInfo,
-					Reason:  err.Error(),
-				})
-				return nil, err
-			}
-		}
-		gpuInfo.DependencyPath = []string{libDir}
-
-		if gfxOverride == "" {
-			// Only load supported list once
-			if len(supported) == 0 {
-				supported, err = GetSupportedGFX(libDir)
-				if err != nil {
-					err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
-					slog.Warn(err.Error())
-					unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-						GpuInfo: gpuInfo.GpuInfo,
-						Reason:  err.Error(),
-					})
-					return nil, err
-				}
-				slog.Debug("rocm supported GPUs", "types", supported)
-			}
-			gfx := gpuInfo.Compute
-			if !slices.Contains[[]string, string](supported, gfx) {
-				reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
-				slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
-				unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-					GpuInfo: gpuInfo.GpuInfo,
-					Reason:  reason,
-				})
-
-				// TODO - consider discrete markdown just for ROCM troubleshooting?
-				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
-				continue
-			} else {
-				slog.Info("amdgpu is supported", "gpu", gpuInfo.ID, "gpu_type", gfx)
-			}
-		} else {
-			slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
-		}
-
-		// Check for env var workarounds
-		if name == "1002:687f" { // Vega RX 56
-			gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"})
-		}
-
-		// The GPU has passed all the verification steps and is supported
-		resp = append(resp, gpuInfo)
-	}
-	if len(resp) == 0 {
-		err := fmt.Errorf("no compatible amdgpu devices detected")
-		slog.Info(err.Error())
-		return nil, err
-	}
-	if err := verifyKFDDriverAccess(); err != nil {
-		err = fmt.Errorf("amdgpu devices detected but permission problems block access: %w", err)
-		slog.Error(err.Error())
-		return nil, err
-	}
-	return resp, nil
-}
-
-// Quick check for AMD driver so we can skip amdgpu discovery if not present
-func AMDDetected() bool {
-	// Some driver versions (older?) don't have a version file, so just lookup the parent dir
-	sysfsDir := filepath.Dir(DriverVersionFile)
-	_, err := os.Stat(sysfsDir)
-	if errors.Is(err, os.ErrNotExist) {
-		slog.Debug("amdgpu driver not detected " + sysfsDir)
-		return false
-	} else if err != nil {
-		slog.Debug("error looking up amd driver", "path", sysfsDir, "error", err)
-		return false
-	}
-	return true
-}
-
-// Prefer to use host installed ROCm, as long as it meets our minimum requirements
-// failing that, tell the user how to download it on their own
-func AMDValidateLibDir() (string, error) {
-	libDir, err := commonAMDValidateLibDir()
-	if err == nil {
-		return libDir, nil
-	}
-
-	// Well known ollama installer path
-	installedRocmDir := "/usr/share/ollama/lib/rocm"
-	if rocmLibUsable(installedRocmDir) {
-		return installedRocmDir, nil
-	}
-
-	// If we still haven't found a usable rocm, the user will have to install it on their own
-	slog.Warn("amdgpu detected, but no compatible rocm library found.  Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
-	return "", errors.New("no suitable rocm found, falling back to CPU")
-}
-
-func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
-	_, err = os.Stat(DriverVersionFile)
-	if err != nil {
-		return 0, 0, fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
-	}
-	fp, err := os.Open(DriverVersionFile)
-	if err != nil {
-		return 0, 0, err
-	}
-	defer fp.Close()
-	verString, err := io.ReadAll(fp)
-	if err != nil {
-		return 0, 0, err
-	}
-
-	pattern := `\A(\d+)\.(\d+).*`
-	regex := regexp.MustCompile(pattern)
-	match := regex.FindStringSubmatch(string(verString))
-	if len(match) < 2 {
-		return 0, 0, fmt.Errorf("malformed version string %s", string(verString))
-	}
-	driverMajor, err = strconv.Atoi(match[1])
-	if err != nil {
-		return 0, 0, err
-	}
-	driverMinor, err = strconv.Atoi(match[2])
-	if err != nil {
-		return 0, 0, err
-	}
-	return driverMajor, driverMinor, nil
-}
-
-func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
-	if len(gpus) == 0 {
-		return nil
-	}
-	for i := range gpus {
-		usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
-		if err != nil {
-			return err
-		}
-		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
-		gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
-	}
-	return nil
-}
-
-func getFreeMemory(usedFile string) (uint64, error) {
-	buf, err := os.ReadFile(usedFile)
-	if err != nil {
-		return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
-	}
-	usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
-	if err != nil {
-		slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
-		return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
-	}
-	return usedMemory, nil
-}
-
-func verifyKFDDriverAccess() error {
-	// Verify we have permissions - either running as root, or we have group access to the driver
-	fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0o666)
-	if err != nil {
-		if errors.Is(err, fs.ErrPermission) {
-			return fmt.Errorf("permissions not set up properly.  Either run ollama as root, or add you user account to the render group. %w", err)
-		} else if errors.Is(err, fs.ErrNotExist) {
-			// Container runtime failure?
-			return fmt.Errorf("kfd driver not loaded.  If running in a container, remember to include '--device /dev/kfd --device /dev/dri'")
-		}
-		return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
-	}
-	fd.Close()
-	return nil
-}
-
-func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
-	ids := []string{}
-	for _, info := range gpuInfo {
-		if info.Library != "rocm" {
-			// TODO shouldn't happen if things are wired correctly...
-			slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
-			continue
-		}
-		ids = append(ids, info.ID)
-	}
-	// There are 3 potential env vars to use to select GPUs.
-	// ROCR_VISIBLE_DEVICES supports UUID or numeric so is our preferred on linux
-	// GPU_DEVICE_ORDINAL supports numeric IDs only
-	// HIP_VISIBLE_DEVICES supports numeric IDs only
-	return "ROCR_VISIBLE_DEVICES", strings.Join(ids, ",")
-}
--- a/discover/amd_windows.go
+++ b/discover/amd_windows.go
@ -1,220 +0,0 @@
-package discover
-
-import (
-	"bytes"
-	"errors"
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"slices"
-	"strconv"
-	"strings"
-
-	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/format"
-)
-
-const (
-
-	// TODO  We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true
-	iGPUName = "AMD Radeon(TM) Graphics"
-)
-
-var (
-	// Used to validate if the given ROCm lib is usable
-	ROCmLibGlobs          = []string{"hipblas.dll", "rocblas"}                 // This is not sufficient to discern v5 vs v6
-	RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
-)
-
-// Only called once during bootstrap
-func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
-	resp := []RocmGPUInfo{}
-	hl, err := NewHipLib()
-	if err != nil {
-		slog.Debug(err.Error())
-		return nil, err
-	}
-	defer hl.Release()
-
-	driverMajor, driverMinor, err := hl.AMDDriverVersion()
-	if err != nil {
-		// For now this is benign, but we may eventually need to fail compatibility checks
-		slog.Debug("error looking up amd driver version", "error", err)
-	}
-
-	// Note: the HIP library automatically handles subsetting to any *_VISIBLE_DEVICES the user specified
-	count := hl.HipGetDeviceCount()
-	if count == 0 {
-		err := fmt.Errorf("no compatible amdgpu devices detected")
-		slog.Info(err.Error())
-		return nil, err
-	}
-	libDir, err := AMDValidateLibDir()
-	if err != nil {
-		err = fmt.Errorf("unable to verify rocm library: %w", err)
-		slog.Warn(err.Error())
-		return nil, err
-	}
-
-	var supported []string
-	gfxOverride := envconfig.HsaOverrideGfxVersion()
-	if gfxOverride == "" {
-		supported, err = GetSupportedGFX(libDir)
-		if err != nil {
-			err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
-			slog.Warn(err.Error())
-			return nil, err
-		}
-	} else {
-		slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
-	}
-
-	slog.Debug("detected hip devices", "count", count)
-	// TODO how to determine the underlying device ID when visible devices is causing this to subset?
-	for i := range count {
-		err = hl.HipSetDevice(i)
-		if err != nil {
-			slog.Warn("set device", "id", i, "error", err)
-			continue
-		}
-
-		props, err := hl.HipGetDeviceProperties(i)
-		if err != nil {
-			slog.Warn("get properties", "id", i, "error", err)
-			continue
-		}
-		n := bytes.IndexByte(props.Name[:], 0)
-		name := string(props.Name[:n])
-		// TODO is UUID actually populated on windows?
-		// Can luid be used on windows for setting visible devices (and is it actually set?)
-		n = bytes.IndexByte(props.GcnArchName[:], 0)
-		gfx := string(props.GcnArchName[:n])
-		slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
-		// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
-		// TODO  Why isn't props.iGPU accurate!?
-
-		freeMemory, totalMemory, err := hl.HipMemGetInfo()
-		if err != nil {
-			slog.Warn("get mem info", "id", i, "error", err)
-			continue
-		}
-
-		gpuInfo := RocmGPUInfo{
-			GpuInfo: GpuInfo{
-				Library: "rocm",
-				memInfo: memInfo{
-					TotalMemory: totalMemory,
-					FreeMemory:  freeMemory,
-				},
-				// Free memory reporting on Windows is not reliable until we bump to ROCm v6.2
-				UnreliableFreeMemory: true,
-
-				ID:             strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
-				DependencyPath: []string{libDir},
-				MinimumMemory:  rocmMinimumMemory,
-				Name:           name,
-				Compute:        gfx,
-				DriverMajor:    driverMajor,
-				DriverMinor:    driverMinor,
-			},
-			index: i,
-		}
-
-		// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
-		if strings.EqualFold(name, iGPUName) || totalMemory < IGPUMemLimit {
-			reason := "unsupported Radeon iGPU detected skipping"
-			slog.Info(reason, "id", gpuInfo.ID, "total", format.HumanBytes2(totalMemory))
-			unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-				GpuInfo: gpuInfo.GpuInfo,
-				Reason:  reason,
-			})
-			continue
-		}
-
-		// Strip off Target Features when comparing
-		if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
-			reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
-			slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
-			unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
-				GpuInfo: gpuInfo.GpuInfo,
-				Reason:  reason,
-			})
-			// HSA_OVERRIDE_GFX_VERSION not supported on windows
-			continue
-		} else {
-			slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
-		}
-
-		slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
-		slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
-
-		resp = append(resp, gpuInfo)
-	}
-
-	return resp, nil
-}
-
-func AMDValidateLibDir() (string, error) {
-	libDir, err := commonAMDValidateLibDir()
-	if err == nil {
-		return libDir, nil
-	}
-
-	// Installer payload (if we're running from some other location)
-	localAppData := os.Getenv("LOCALAPPDATA")
-	appDir := filepath.Join(localAppData, "Programs", "Ollama")
-	rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama")
-	if rocmLibUsable(rocmTargetDir) {
-		slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
-		return rocmTargetDir, nil
-	}
-
-	// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
-	slog.Warn("amdgpu detected, but no compatible rocm library found.  Please install ROCm")
-	return "", errors.New("no suitable rocm found, falling back to CPU")
-}
-
-func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
-	if len(gpus) == 0 {
-		return nil
-	}
-	hl, err := NewHipLib()
-	if err != nil {
-		slog.Debug(err.Error())
-		return nil
-	}
-	defer hl.Release()
-
-	for i := range gpus {
-		err := hl.HipSetDevice(gpus[i].index)
-		if err != nil {
-			return err
-		}
-		freeMemory, _, err := hl.HipMemGetInfo()
-		if err != nil {
-			slog.Warn("get mem info", "id", i, "error", err)
-			continue
-		}
-		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
-		gpus[i].FreeMemory = freeMemory
-	}
-	return nil
-}
-
-func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
-	ids := []string{}
-	for _, info := range gpuInfo {
-		if info.Library != "rocm" {
-			// TODO shouldn't happen if things are wired correctly...
-			slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
-			continue
-		}
-		ids = append(ids, info.ID)
-	}
-	// There are 3 potential env vars to use to select GPUs.
-	// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
-	// HIP_VISIBLE_DEVICES supports numeric IDs only
-	// GPU_DEVICE_ORDINAL supports numeric IDs only
-	return "HIP_VISIBLE_DEVICES", strings.Join(ids, ",")
-}
--- a/discover/cpu_common.go
+++ b/discover/cpu_common.go
@ -1,37 +0,0 @@
-package discover
-
-import (
-	"os"
-	"path/filepath"
-	"runtime"
-	"strings"
-
-	"golang.org/x/sys/cpu"
-)
-
-func GetCPUCapability() CPUCapability {
-	if cpu.X86.HasAVX2 {
-		return CPUCapabilityAVX2
-	}
-	if cpu.X86.HasAVX {
-		return CPUCapabilityAVX
-	}
-	// else LCD
-	return CPUCapabilityNone
-}
-
-func IsNUMA() bool {
-	if runtime.GOOS != "linux" {
-		// numa support in llama.cpp is linux only
-		return false
-	}
-	ids := map[string]interface{}{}
-	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
-	for _, packageId := range packageIds {
-		id, err := os.ReadFile(packageId)
-		if err == nil {
-			ids[strings.TrimSpace(string(id))] = struct{}{}
-		}
-	}
-	return len(ids) > 1
-}
--- a/discover/cuda_common.go
+++ b/discover/cuda_common.go
@ -1,64 +0,0 @@
-//go:build linux || windows
-
-package discover
-
-import (
-	"log/slog"
-	"os"
-	"regexp"
-	"runtime"
-	"strconv"
-	"strings"
-)
-
-// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
-// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
-var CudaTegra string = os.Getenv("JETSON_JETPACK")
-
-func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
-	ids := []string{}
-	for _, info := range gpuInfo {
-		if info.Library != "cuda" {
-			// TODO shouldn't happen if things are wired correctly...
-			slog.Debug("cudaGetVisibleDevicesEnv skipping over non-cuda device", "library", info.Library)
-			continue
-		}
-		ids = append(ids, info.ID)
-	}
-	return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
-}
-
-func cudaVariant(gpuInfo CudaGPUInfo) string {
-	if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
-		if CudaTegra != "" {
-			ver := strings.Split(CudaTegra, ".")
-			if len(ver) > 0 {
-				return "jetpack" + ver[0]
-			}
-		} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
-			r := regexp.MustCompile(` R(\d+) `)
-			m := r.FindSubmatch(data)
-			if len(m) != 2 {
-				slog.Info("Unexpected format for /etc/nv_tegra_release.  Set JETSON_JETPACK to select version")
-			} else {
-				if l4t, err := strconv.Atoi(string(m[1])); err == nil {
-					// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
-					// https://developer.nvidia.com/embedded/jetpack-archive
-					switch l4t {
-					case 35:
-						return "jetpack5"
-					case 36:
-						return "jetpack6"
-					default:
-						slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
-					}
-				}
-			}
-		}
-	}
-
-	if gpuInfo.computeMajor < 6 || gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) {
-		return "v11"
-	}
-	return "v12"
-}
--- a/discover/gpu.go
+++ b/discover/gpu.go
@ -1,754 +0,0 @@
-//go:build linux || windows
-
-package discover
-
-/*
-#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
-#cgo windows LDFLAGS: -lpthread
-
-#include "gpu_info.h"
-*/
-import "C"
-
-import (
-	"fmt"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"runtime"
-	"strings"
-	"sync"
-	"unsafe"
-
-	"github.com/ollama/ollama/envconfig"
-	"github.com/ollama/ollama/format"
-)
-
-type cudaHandles struct {
-	deviceCount int
-	cudart      *C.cudart_handle_t
-	nvcuda      *C.nvcuda_handle_t
-	nvml        *C.nvml_handle_t
-}
-
-type oneapiHandles struct {
-	oneapi      *C.oneapi_handle_t
-	deviceCount int
-}
-
-const (
-	cudaMinimumMemory = 457 * format.MebiByte
-	rocmMinimumMemory = 457 * format.MebiByte
-	// TODO OneAPI minimum memory
-)
-
-var (
-	gpuMutex      sync.Mutex
-	bootstrapped  bool
-	cpuCapability CPUCapability
-	cpus          []CPUInfo
-	cudaGPUs      []CudaGPUInfo
-	nvcudaLibPath string
-	cudartLibPath string
-	oneapiLibPath string
-	nvmlLibPath   string
-	rocmGPUs      []RocmGPUInfo
-	oneapiGPUs    []OneapiGPUInfo
-
-	// If any discovered GPUs are incompatible, report why
-	unsupportedGPUs []UnsupportedGPUInfo
-
-	// Keep track of errors during bootstrapping so that if GPUs are missing
-	// they expected to be present this may explain why
-	bootstrapErrors []error
-)
-
-// With our current CUDA compile flags, older than 5.0 will not work properly
-var CudaComputeMin = [2]C.int{5, 0}
-
-var RocmComputeMin = 9
-
-// TODO find a better way to detect iGPU instead of minimum memory
-const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
-
-// Note: gpuMutex must already be held
-func initCudaHandles() *cudaHandles {
-	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
-
-	cHandles := &cudaHandles{}
-	// Short Circuit if we already know which library to use
-	// ignore bootstrap errors in this case since we already recorded them
-	if nvmlLibPath != "" {
-		cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
-		return cHandles
-	}
-	if nvcudaLibPath != "" {
-		cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
-		return cHandles
-	}
-	if cudartLibPath != "" {
-		cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
-		return cHandles
-	}
-
-	slog.Debug("searching for GPU discovery libraries for NVIDIA")
-	var cudartMgmtPatterns []string
-
-	// Aligned with driver, we can't carry as payloads
-	nvcudaMgmtPatterns := NvcudaGlobs
-
-	if runtime.GOOS == "windows" {
-		localAppData := os.Getenv("LOCALAPPDATA")
-		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
-	}
-	libDir := LibraryDir()
-	if libDir != "" {
-		cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
-	}
-	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
-
-	if len(NvmlGlobs) > 0 {
-		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
-		if len(nvmlLibPaths) > 0 {
-			nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
-			if nvml != nil {
-				slog.Debug("nvidia-ml loaded", "library", libPath)
-				cHandles.nvml = nvml
-				nvmlLibPath = libPath
-			}
-			if err != nil {
-				bootstrapErrors = append(bootstrapErrors, err)
-			}
-		}
-	}
-
-	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
-	if len(nvcudaLibPaths) > 0 {
-		deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
-		if nvcuda != nil {
-			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
-			cHandles.nvcuda = nvcuda
-			cHandles.deviceCount = deviceCount
-			nvcudaLibPath = libPath
-			return cHandles
-		}
-		if err != nil {
-			bootstrapErrors = append(bootstrapErrors, err)
-		}
-	}
-
-	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
-	if len(cudartLibPaths) > 0 {
-		deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
-		if cudart != nil {
-			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
-			cHandles.cudart = cudart
-			cHandles.deviceCount = deviceCount
-			cudartLibPath = libPath
-			return cHandles
-		}
-		if err != nil {
-			bootstrapErrors = append(bootstrapErrors, err)
-		}
-	}
-
-	return cHandles
-}
-
-// Note: gpuMutex must already be held
-func initOneAPIHandles() *oneapiHandles {
-	oHandles := &oneapiHandles{}
-
-	// Short Circuit if we already know which library to use
-	// ignore bootstrap errors in this case since we already recorded them
-	if oneapiLibPath != "" {
-		oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
-		return oHandles
-	}
-
-	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
-	if len(oneapiLibPaths) > 0 {
-		var err error
-		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
-		if err != nil {
-			bootstrapErrors = append(bootstrapErrors, err)
-		}
-	}
-
-	return oHandles
-}
-
-func GetCPUInfo() GpuInfoList {
-	gpuMutex.Lock()
-	if !bootstrapped {
-		gpuMutex.Unlock()
-		GetGPUInfo()
-	} else {
-		gpuMutex.Unlock()
-	}
-	return GpuInfoList{cpus[0].GpuInfo}
-}
-
-func GetGPUInfo() GpuInfoList {
-	// TODO - consider exploring lspci (and equivalent on windows) to check for
-	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
-	gpuMutex.Lock()
-	defer gpuMutex.Unlock()
-	needRefresh := true
-	var cHandles *cudaHandles
-	var oHandles *oneapiHandles
-	defer func() {
-		if cHandles != nil {
-			if cHandles.cudart != nil {
-				C.cudart_release(*cHandles.cudart)
-			}
-			if cHandles.nvcuda != nil {
-				C.nvcuda_release(*cHandles.nvcuda)
-			}
-			if cHandles.nvml != nil {
-				C.nvml_release(*cHandles.nvml)
-			}
-		}
-		if oHandles != nil {
-			if oHandles.oneapi != nil {
-				// TODO - is this needed?
-				C.oneapi_release(*oHandles.oneapi)
-			}
-		}
-	}()
-
-	if !bootstrapped {
-		slog.Info("looking for compatible GPUs")
-		bootstrapErrors = []error{}
-		needRefresh = false
-		cpuCapability = GetCPUCapability()
-		var memInfo C.mem_info_t
-
-		mem, err := GetCPUMem()
-		if err != nil {
-			slog.Warn("error looking up system memory", "error", err)
-		}
-		depPath := LibraryDir()
-		details, err := GetCPUDetails()
-		if err != nil {
-			slog.Warn("failed to lookup CPU details", "error", err)
-		}
-		cpus = []CPUInfo{
-			{
-				GpuInfo: GpuInfo{
-					memInfo:        mem,
-					Library:        "cpu",
-					Variant:        cpuCapability.String(),
-					ID:             "0",
-					DependencyPath: []string{depPath},
-				},
-				CPUs: details,
-			},
-		}
-
-		// Fallback to CPU mode if we're lacking required vector extensions on x86
-		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
-			err := fmt.Errorf("CPU does not have minimum vector extensions, GPU inference disabled.  Required:%s  Detected:%s", GPURunnerCPUCapability, cpuCapability)
-			slog.Warn(err.Error())
-			bootstrapErrors = append(bootstrapErrors, err)
-			bootstrapped = true
-			// No need to do any GPU discovery, since we can't run on them
-			return GpuInfoList{cpus[0].GpuInfo}
-		}
-
-		// Load ALL libraries
-		cHandles = initCudaHandles()
-
-		// NVIDIA
-		for i := range cHandles.deviceCount {
-			if cHandles.cudart != nil || cHandles.nvcuda != nil {
-				gpuInfo := CudaGPUInfo{
-					GpuInfo: GpuInfo{
-						Library: "cuda",
-					},
-					index: i,
-				}
-				var driverMajor int
-				var driverMinor int
-				if cHandles.cudart != nil {
-					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
-				} else {
-					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
-					driverMajor = int(cHandles.nvcuda.driver_major)
-					driverMinor = int(cHandles.nvcuda.driver_minor)
-				}
-				if memInfo.err != nil {
-					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
-					C.free(unsafe.Pointer(memInfo.err))
-					continue
-				}
-				gpuInfo.TotalMemory = uint64(memInfo.total)
-				gpuInfo.FreeMemory = uint64(memInfo.free)
-				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
-				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
-				gpuInfo.computeMajor = int(memInfo.major)
-				gpuInfo.computeMinor = int(memInfo.minor)
-				gpuInfo.MinimumMemory = cudaMinimumMemory
-				gpuInfo.DriverMajor = driverMajor
-				gpuInfo.DriverMinor = driverMinor
-				variant := cudaVariant(gpuInfo)
-				if depPath != "" {
-					gpuInfo.DependencyPath = []string{depPath}
-					// Check for variant specific directory
-					if variant != "" {
-						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
-							gpuInfo.DependencyPath = []string{filepath.Join(depPath, "cuda_"+variant), depPath}
-						}
-					}
-				}
-				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
-				gpuInfo.Variant = variant
-
-				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
-					unsupportedGPUs = append(unsupportedGPUs,
-						UnsupportedGPUInfo{
-							GpuInfo: gpuInfo.GpuInfo,
-						})
-					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
-					continue
-				}
-
-				// query the management library as well so we can record any skew between the two
-				// which represents overhead on the GPU we must set aside on subsequent updates
-				if cHandles.nvml != nil {
-					uuid := C.CString(gpuInfo.ID)
-					defer C.free(unsafe.Pointer(uuid))
-					C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
-					if memInfo.err != nil {
-						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
-						C.free(unsafe.Pointer(memInfo.err))
-					} else {
-						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
-							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
-							slog.Info("detected OS VRAM overhead",
-								"id", gpuInfo.ID,
-								"library", gpuInfo.Library,
-								"compute", gpuInfo.Compute,
-								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
-								"name", gpuInfo.Name,
-								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
-							)
-						}
-					}
-				}
-
-				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
-				cudaGPUs = append(cudaGPUs, gpuInfo)
-			}
-		}
-
-		// Intel
-		if envconfig.IntelGPU() {
-			oHandles = initOneAPIHandles()
-			if oHandles != nil && oHandles.oneapi != nil {
-				for d := range oHandles.oneapi.num_drivers {
-					if oHandles.oneapi == nil {
-						// shouldn't happen
-						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
-						continue
-					}
-					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
-					for i := range devCount {
-						gpuInfo := OneapiGPUInfo{
-							GpuInfo: GpuInfo{
-								Library: "oneapi",
-							},
-							driverIndex: int(d),
-							gpuIndex:    int(i),
-						}
-						// TODO - split bootstrapping from updating free memory
-						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
-						// TODO - convert this to MinimumMemory based on testing...
-						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
-						memInfo.free = C.uint64_t(totalFreeMem)
-						gpuInfo.TotalMemory = uint64(memInfo.total)
-						gpuInfo.FreeMemory = uint64(memInfo.free)
-						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
-						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
-						gpuInfo.DependencyPath = []string{depPath}
-						oneapiGPUs = append(oneapiGPUs, gpuInfo)
-					}
-				}
-			}
-		}
-
-		rocmGPUs, err = AMDGetGPUInfo()
-		if err != nil {
-			bootstrapErrors = append(bootstrapErrors, err)
-		}
-		bootstrapped = true
-		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
-			slog.Info("no compatible GPUs were discovered")
-		}
-	}
-
-	// For detected GPUs, load library if not loaded
-
-	// Refresh free memory usage
-	if needRefresh {
-		mem, err := GetCPUMem()
-		if err != nil {
-			slog.Warn("error looking up system memory", "error", err)
-		} else {
-			slog.Debug("updating system memory data",
-				slog.Group(
-					"before",
-					"total", format.HumanBytes2(cpus[0].TotalMemory),
-					"free", format.HumanBytes2(cpus[0].FreeMemory),
-					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
-				),
-				slog.Group(
-					"now",
-					"total", format.HumanBytes2(mem.TotalMemory),
-					"free", format.HumanBytes2(mem.FreeMemory),
-					"free_swap", format.HumanBytes2(mem.FreeSwap),
-				),
-			)
-			cpus[0].FreeMemory = mem.FreeMemory
-			cpus[0].FreeSwap = mem.FreeSwap
-		}
-
-		var memInfo C.mem_info_t
-		if cHandles == nil && len(cudaGPUs) > 0 {
-			cHandles = initCudaHandles()
-		}
-		for i, gpu := range cudaGPUs {
-			if cHandles.nvml != nil {
-				uuid := C.CString(gpu.ID)
-				defer C.free(unsafe.Pointer(uuid))
-				C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
-			} else if cHandles.cudart != nil {
-				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
-			} else if cHandles.nvcuda != nil {
-				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
-				memInfo.used = memInfo.total - memInfo.free
-			} else {
-				// shouldn't happen
-				slog.Warn("no valid cuda library loaded to refresh vram usage")
-				break
-			}
-			if memInfo.err != nil {
-				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
-				C.free(unsafe.Pointer(memInfo.err))
-				continue
-			}
-			if memInfo.free == 0 {
-				slog.Warn("error looking up nvidia GPU memory")
-				continue
-			}
-			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
-				// When using the management library update based on recorded overhead
-				memInfo.free -= C.uint64_t(gpu.OSOverhead)
-			}
-			slog.Debug("updating cuda memory data",
-				"gpu", gpu.ID,
-				"name", gpu.Name,
-				"overhead", format.HumanBytes2(gpu.OSOverhead),
-				slog.Group(
-					"before",
-					"total", format.HumanBytes2(gpu.TotalMemory),
-					"free", format.HumanBytes2(gpu.FreeMemory),
-				),
-				slog.Group(
-					"now",
-					"total", format.HumanBytes2(uint64(memInfo.total)),
-					"free", format.HumanBytes2(uint64(memInfo.free)),
-					"used", format.HumanBytes2(uint64(memInfo.used)),
-				),
-			)
-			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
-		}
-
-		if oHandles == nil && len(oneapiGPUs) > 0 {
-			oHandles = initOneAPIHandles()
-		}
-		for i, gpu := range oneapiGPUs {
-			if oHandles.oneapi == nil {
-				// shouldn't happen
-				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
-				continue
-			}
-			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
-			// TODO - convert this to MinimumMemory based on testing...
-			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
-			memInfo.free = C.uint64_t(totalFreeMem)
-			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
-		}
-
-		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
-		if err != nil {
-			slog.Debug("problem refreshing ROCm free memory", "error", err)
-		}
-	}
-
-	resp := []GpuInfo{}
-	for _, gpu := range cudaGPUs {
-		resp = append(resp, gpu.GpuInfo)
-	}
-	for _, gpu := range rocmGPUs {
-		resp = append(resp, gpu.GpuInfo)
-	}
-	for _, gpu := range oneapiGPUs {
-		resp = append(resp, gpu.GpuInfo)
-	}
-	if len(resp) == 0 {
-		resp = append(resp, cpus[0].GpuInfo)
-	}
-	return resp
-}
-
-func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
-	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
-	var ldPaths []string
-	gpuLibPaths := []string{}
-	slog.Debug("Searching for GPU library", "name", baseLibName)
-
-	// Start with our bundled libraries
-	patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
-
-	switch runtime.GOOS {
-	case "windows":
-		ldPaths = strings.Split(os.Getenv("PATH"), ";")
-	case "linux":
-		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
-	default:
-		return gpuLibPaths
-	}
-
-	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
-	for _, ldPath := range ldPaths {
-		d, err := filepath.Abs(ldPath)
-		if err != nil {
-			continue
-		}
-		patterns = append(patterns, filepath.Join(d, baseLibName))
-	}
-	patterns = append(patterns, defaultPatterns...)
-	slog.Debug("gpu library search", "globs", patterns)
-	for _, pattern := range patterns {
-
-		// Nvidia PhysX known to return bogus results
-		if strings.Contains(pattern, "PhysX") {
-			slog.Debug("skipping PhysX cuda library path", "path", pattern)
-			continue
-		}
-		// Ignore glob discovery errors
-		matches, _ := filepath.Glob(pattern)
-		for _, match := range matches {
-			// Resolve any links so we don't try the same lib multiple times
-			// and weed out any dups across globs
-			libPath := match
-			tmp := match
-			var err error
-			for ; err == nil; tmp, err = os.Readlink(libPath) {
-				if !filepath.IsAbs(tmp) {
-					tmp = filepath.Join(filepath.Dir(libPath), tmp)
-				}
-				libPath = tmp
-			}
-			new := true
-			for _, cmp := range gpuLibPaths {
-				if cmp == libPath {
-					new = false
-					break
-				}
-			}
-			if new {
-				gpuLibPaths = append(gpuLibPaths, libPath)
-			}
-		}
-	}
-	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
-	return gpuLibPaths
-}
-
-// Bootstrap the runtime library
-// Returns: num devices, handle, libPath, error
-func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
-	var resp C.cudart_init_resp_t
-	resp.ch.verbose = getVerboseState()
-	var err error
-	for _, libPath := range cudartLibPaths {
-		lib := C.CString(libPath)
-		defer C.free(unsafe.Pointer(lib))
-		C.cudart_init(lib, &resp)
-		if resp.err != nil {
-			err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
-			slog.Debug(err.Error())
-			C.free(unsafe.Pointer(resp.err))
-		} else {
-			err = nil
-			return int(resp.num_devices), &resp.ch, libPath, err
-		}
-	}
-	return 0, nil, "", err
-}
-
-// Bootstrap the driver library
-// Returns: num devices, handle, libPath, error
-func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
-	var resp C.nvcuda_init_resp_t
-	resp.ch.verbose = getVerboseState()
-	var err error
-	for _, libPath := range nvcudaLibPaths {
-		lib := C.CString(libPath)
-		defer C.free(unsafe.Pointer(lib))
-		C.nvcuda_init(lib, &resp)
-		if resp.err != nil {
-			// Decide what log level based on the type of error message to help users understand why
-			switch resp.cudaErr {
-			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
-				err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
-				slog.Warn(err.Error())
-			case C.CUDA_ERROR_NO_DEVICE:
-				err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
-				slog.Info(err.Error())
-			case C.CUDA_ERROR_UNKNOWN:
-				err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
-				slog.Warn(err.Error())
-			default:
-				msg := C.GoString(resp.err)
-				if strings.Contains(msg, "wrong ELF class") {
-					slog.Debug("skipping 32bit library", "library", libPath)
-				} else {
-					err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
-					slog.Info(err.Error())
-				}
-			}
-			C.free(unsafe.Pointer(resp.err))
-		} else {
-			err = nil
-			return int(resp.num_devices), &resp.ch, libPath, err
-		}
-	}
-	return 0, nil, "", err
-}
-
-// Bootstrap the management library
-// Returns: handle, libPath, error
-func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
-	var resp C.nvml_init_resp_t
-	resp.ch.verbose = getVerboseState()
-	var err error
-	for _, libPath := range nvmlLibPaths {
-		lib := C.CString(libPath)
-		defer C.free(unsafe.Pointer(lib))
-		C.nvml_init(lib, &resp)
-		if resp.err != nil {
-			err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
-			slog.Info(err.Error())
-			C.free(unsafe.Pointer(resp.err))
-		} else {
-			err = nil
-			return &resp.ch, libPath, err
-		}
-	}
-	return nil, "", err
-}
-
-// bootstrap the Intel GPU library
-// Returns: num devices, handle, libPath, error
-func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
-	var resp C.oneapi_init_resp_t
-	num_devices := 0
-	resp.oh.verbose = getVerboseState()
-	var err error
-	for _, libPath := range oneapiLibPaths {
-		lib := C.CString(libPath)
-		defer C.free(unsafe.Pointer(lib))
-		C.oneapi_init(lib, &resp)
-		if resp.err != nil {
-			err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
-			slog.Debug(err.Error())
-			C.free(unsafe.Pointer(resp.err))
-		} else {
-			err = nil
-			for i := range resp.oh.num_drivers {
-				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
-			}
-			return num_devices, &resp.oh, libPath, err
-		}
-	}
-	return 0, nil, "", err
-}
-
-func getVerboseState() C.uint16_t {
-	if envconfig.Debug() {
-		return C.uint16_t(1)
-	}
-	return C.uint16_t(0)
-}
-
-// Given the list of GPUs this instantiation is targeted for,
-// figure out the visible devices environment variable
-//
-// If different libraries are detected, the first one is what we use
-func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
-	if len(l) == 0 {
-		return "", ""
-	}
-	switch l[0].Library {
-	case "cuda":
-		return cudaGetVisibleDevicesEnv(l)
-	case "rocm":
-		return rocmGetVisibleDevicesEnv(l)
-	case "oneapi":
-		return oneapiGetVisibleDevicesEnv(l)
-	default:
-		slog.Debug("no filter required for library " + l[0].Library)
-		return "", ""
-	}
-}
-
-func LibraryDir() string {
-	// On Windows/linux we bundle the dependencies at the same level as the executable
-	appExe, err := os.Executable()
-	if err != nil {
-		slog.Warn("failed to lookup executable path", "error", err)
-	}
-	cwd, err := os.Getwd()
-	if err != nil {
-		slog.Warn("failed to lookup working directory", "error", err)
-	}
-	// Scan for any of our dependeices, and pick first match
-	for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
-		libDep := filepath.Join("lib", "ollama")
-		if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
-			return filepath.Join(root, libDep)
-		}
-		// Developer mode, local build
-		if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
-			return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
-		}
-		if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
-			return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
-		}
-	}
-	slog.Warn("unable to locate gpu dependency libraries")
-	return ""
-}
-
-func GetSystemInfo() SystemInfo {
-	gpus := GetGPUInfo()
-	gpuMutex.Lock()
-	defer gpuMutex.Unlock()
-	discoveryErrors := []string{}
-	for _, err := range bootstrapErrors {
-		discoveryErrors = append(discoveryErrors, err.Error())
-	}
-	if len(gpus) == 1 && gpus[0].Library == "cpu" {
-		gpus = []GpuInfo{}
-	}
-
-	return SystemInfo{
-		System:          cpus[0],
-		GPUs:            gpus,
-		UnsupportedGPUs: unsupportedGPUs,
-		DiscoveryErrors: discoveryErrors,
-	}
-}
--- a/discover/gpu_darwin.go
+++ b/discover/gpu_darwin.go
@ -1,101 +0,0 @@
-//go:build darwin
-
-package discover
-
-/*
-#cgo CFLAGS: -x objective-c
-#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
-#include "gpu_info_darwin.h"
-*/
-import "C"
-
-import (
-	"log/slog"
-	"runtime"
-	"syscall"
-
-	"github.com/ollama/ollama/format"
-)
-
-const (
-	metalMinimumMemory = 512 * format.MebiByte
-)
-
-func GetGPUInfo() GpuInfoList {
-	mem, _ := GetCPUMem()
-	if runtime.GOARCH == "amd64" {
-		return []GpuInfo{
-			{
-				Library: "cpu",
-				Variant: GetCPUCapability().String(),
-				memInfo: mem,
-			},
-		}
-	}
-	info := GpuInfo{
-		Library: "metal",
-		ID:      "0",
-	}
-	info.TotalMemory = uint64(C.getRecommendedMaxVRAM())
-
-	// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
-	info.FreeMemory = info.TotalMemory
-
-	info.MinimumMemory = metalMinimumMemory
-	return []GpuInfo{info}
-}
-
-func GetCPUInfo() GpuInfoList {
-	mem, _ := GetCPUMem()
-	return []GpuInfo{
-		{
-			Library: "cpu",
-			Variant: GetCPUCapability().String(),
-			memInfo: mem,
-		},
-	}
-}
-
-func GetCPUMem() (memInfo, error) {
-	return memInfo{
-		TotalMemory: uint64(C.getPhysicalMemory()),
-		FreeMemory:  uint64(C.getFreeMemory()),
-		// FreeSwap omitted as Darwin uses dynamic paging
-	}, nil
-}
-
-func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
-	// No-op on darwin
-	return "", ""
-}
-
-func GetSystemInfo() SystemInfo {
-	mem, _ := GetCPUMem()
-	query := "hw.perflevel0.physicalcpu"
-	perfCores, err := syscall.SysctlUint32(query)
-	if err != nil {
-		slog.Warn("failed to discover physical CPU details", "query", query, "error", err)
-	}
-	query = "hw.perflevel1.physicalcpu"
-	efficiencyCores, _ := syscall.SysctlUint32(query) // On x86 xeon this wont return data
-
-	// Determine thread count
-	query = "hw.logicalcpu"
-	logicalCores, _ := syscall.SysctlUint32(query)
-
-	return SystemInfo{
-		System: CPUInfo{
-			GpuInfo: GpuInfo{
-				memInfo: mem,
-			},
-			CPUs: []CPU{
-				{
-					CoreCount:           int(perfCores + efficiencyCores),
-					EfficiencyCoreCount: int(efficiencyCores),
-					ThreadCount:         int(logicalCores),
-				},
-			},
-		},
-		GPUs: GetGPUInfo(),
-	}
-}
--- a/discover/gpu_info_darwin.m
+++ b/discover/gpu_info_darwin.m
@ -1,35 +0,0 @@
-#import <Foundation/Foundation.h>
-#import <mach/mach.h>
-#include "gpu_info_darwin.h"
-
-uint64_t getRecommendedMaxVRAM() {
-  id<MTLDevice> device = MTLCreateSystemDefaultDevice();
-  uint64_t result = device.recommendedMaxWorkingSetSize;
-  CFRelease(device);
-  return result;
-}
-
-// getPhysicalMemory returns the total physical memory in bytes
-uint64_t getPhysicalMemory() {
-  return [NSProcessInfo processInfo].physicalMemory;
-}
-
-// getFreeMemory returns the total free memory in bytes, including inactive
-// memory that can be reclaimed by the system.
-uint64_t getFreeMemory() {
-  mach_port_t host_port = mach_host_self();
-  mach_msg_type_number_t host_size = sizeof(vm_statistics64_data_t) / sizeof(integer_t);
-  vm_size_t pagesize;
-  vm_statistics64_data_t vm_stat;
-
-  host_page_size(host_port, &pagesize);
-  if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stat, &host_size) != KERN_SUCCESS) {
-    return 0;
-  }
-
-  uint64_t free_memory = (uint64_t)vm_stat.free_count * pagesize;
-  free_memory += (uint64_t)vm_stat.speculative_count * pagesize;
-  free_memory += (uint64_t)vm_stat.inactive_count * pagesize;
-
-  return free_memory;
-}
--- a/discover/gpu_info_nvml.c
+++ b/discover/gpu_info_nvml.c
@ -1,104 +0,0 @@
-#ifndef __APPLE__  // TODO - maybe consider nvidia support on intel macs?
-
-#include <string.h>
-
-#include "gpu_info_nvml.h"
-
-void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp) {
-  nvmlReturn_t ret;
-  resp->err = NULL;
-  const int buflen = 256;
-  char buf[buflen + 1];
-  int i;
-
-  struct lookup {
-    char *s;
-    void **p;
-  } l[] = {
-      {"nvmlInit_v2", (void *)&resp->ch.nvmlInit_v2},
-      {"nvmlShutdown", (void *)&resp->ch.nvmlShutdown},
-      {"nvmlDeviceGetHandleByUUID", (void *)&resp->ch.nvmlDeviceGetHandleByUUID},
-      {"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.nvmlDeviceGetMemoryInfo},
-      {NULL, NULL},
-  };
-
-  resp->ch.handle = LOAD_LIBRARY(nvml_lib_path, RTLD_LAZY);
-  if (!resp->ch.handle) {
-    char *msg = LOAD_ERR();
-    LOG(resp->ch.verbose, "library %s load err: %s\n", nvml_lib_path, msg);
-    snprintf(buf, buflen,
-             "Unable to load %s library to query for Nvidia GPUs: %s",
-             nvml_lib_path, msg);
-    free(msg);
-    resp->err = strdup(buf);
-    return;
-  }
-
-  // TODO once we've squashed the remaining corner cases remove this log
-  // LOG(resp->ch.verbose, "wiring nvidia management library functions in %s\n", nvml_lib_path);
-  
-  for (i = 0; l[i].s != NULL; i++) {
-    // TODO once we've squashed the remaining corner cases remove this log
-    // LOG(resp->ch.verbose, "dlsym: %s\n", l[i].s);
-
-    *l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
-    if (!*(l[i].p)) {
-      resp->ch.handle = NULL;
-      char *msg = LOAD_ERR();
-      LOG(resp->ch.verbose, "dlerr: %s\n", msg);
-      UNLOAD_LIBRARY(resp->ch.handle);
-      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
-               msg);
-      free(msg);
-      resp->err = strdup(buf);
-      return;
-    }
-  }
-
-  ret = (*resp->ch.nvmlInit_v2)();
-  if (ret != NVML_SUCCESS) {
-    LOG(resp->ch.verbose, "nvmlInit_v2 err: %d\n", ret);
-    UNLOAD_LIBRARY(resp->ch.handle);
-    resp->ch.handle = NULL;
-    snprintf(buf, buflen, "nvml vram init failure: %d", ret);
-    resp->err = strdup(buf);
-    return;
-  }
-}
-
-
-void nvml_get_free(nvml_handle_t h, char *uuid, uint64_t *free, uint64_t *total, uint64_t *used) {
-    nvmlDevice_t device;
-    nvmlMemory_t memInfo = {0};
-    nvmlReturn_t ret;
-    ret = (*h.nvmlDeviceGetHandleByUUID)((const char *)(uuid), &device);
-    if (ret != NVML_SUCCESS) {
-        LOG(1, "unable to get device handle %s: %d", uuid, ret);
-        *free = 0;
-        return;
-    }
-
-    ret = (*h.nvmlDeviceGetMemoryInfo)(device, &memInfo);
-    if (ret != NVML_SUCCESS) {
-        LOG(1, "device memory info lookup failure %s: %d", uuid, ret);
-        *free = 0;
-        return;
-    }
-    *free = memInfo.free;
-    *total = memInfo.total;
-    *used = memInfo.used;
-}
-
-
-void nvml_release(nvml_handle_t h) {
-  LOG(h.verbose, "releasing nvml library\n");
-  nvmlReturn_t ret;
-  ret = (*h.nvmlShutdown)();
-  if (ret != NVML_SUCCESS) {
-    LOG(1, "error during nvmlShutdown %d", ret);
-  }
-  UNLOAD_LIBRARY(h.handle);
-  h.handle = NULL;
-}
-
-#endif  // __APPLE__
--- a/discover/gpu_info_nvml.h
+++ b/discover/gpu_info_nvml.h
@ -1,48 +0,0 @@
-#ifndef __APPLE__
-#ifndef __GPU_INFO_NVML_H__
-#define __GPU_INFO_NVML_H__
-#include "gpu_info.h"
-
-// Just enough typedef's to dlopen/dlsym for memory information
-typedef enum nvmlReturn_enum {
-  NVML_SUCCESS = 0,
-  // Other values omitted for now...
-} nvmlReturn_t;
-typedef void *nvmlDevice_t;  // Opaque is sufficient
-typedef struct nvmlMemory_st {
-  unsigned long long total;
-  unsigned long long free;
-  unsigned long long used;
-} nvmlMemory_t;
-
-typedef enum nvmlBrandType_enum
-{
-    NVML_BRAND_UNKNOWN          = 0,
-} nvmlBrandType_t;
-
-typedef struct nvml_handle {
-  void *handle;
-  uint16_t verbose;
-  nvmlReturn_t (*nvmlInit_v2)(void);
-  nvmlReturn_t (*nvmlShutdown)(void);
-  nvmlReturn_t (*nvmlDeviceGetHandleByUUID)(const char *, nvmlDevice_t *);
-  nvmlReturn_t (*nvmlDeviceGetMemoryInfo)(nvmlDevice_t, nvmlMemory_t *);
-} nvml_handle_t;
-
-typedef struct nvml_init_resp {
-  char *err;  // If err is non-null handle is invalid
-  nvml_handle_t ch;
-} nvml_init_resp_t;
-
-typedef struct nvml_compute_capability {
-  char *err;
-  int major;
-  int minor;
-} nvml_compute_capability_t;
-
-void nvml_init(char *nvml_lib_path, nvml_init_resp_t *resp);
-void nvml_get_free(nvml_handle_t ch, char *uuid, uint64_t *free, uint64_t *total, uint64_t *used);
-void nvml_release(nvml_handle_t ch);
-
-#endif  // __GPU_INFO_NVML_H__
-#endif  // __APPLE__
--- a/discover/gpu_info_oneapi.c
+++ b/discover/gpu_info_oneapi.c
@ -1,259 +0,0 @@
-#ifndef __APPLE__
-
-#include "gpu_info_oneapi.h"
-
-#include <string.h>
-
-void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) {
-  ze_result_t ret;
-  resp->err = NULL;
-  resp->oh.devices = NULL;
-  resp->oh.num_devices = NULL;
-  resp->oh.drivers = NULL;
-  resp->oh.num_drivers = 0;
-  const int buflen = 256;
-  char buf[buflen + 1];
-  int i, d;
-  struct lookup {
-    char *s;
-    void **p;
-  } l[] = {
-      {"zesInit", (void *)&resp->oh.zesInit},
-      {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
-      {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
-      {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
-      {"zesDeviceEnumMemoryModules",
-       (void *)&resp->oh.zesDeviceEnumMemoryModules},
-      {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
-      {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
-      {NULL, NULL},
-  };
-
-  resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
-  if (!resp->oh.handle) {
-    char *msg = LOAD_ERR();
-    snprintf(buf, buflen,
-             "Unable to load %s library to query for Intel GPUs: %s\n",
-             oneapi_lib_path, msg);
-    free(msg);
-    resp->err = strdup(buf);
-    return;
-  }
-
-  // TODO once we've squashed the remaining corner cases remove this log
-  LOG(resp->oh.verbose,
-      "wiring Level-Zero management library functions in %s\n",
-      oneapi_lib_path);
-
-  for (i = 0; l[i].s != NULL; i++) {
-    // TODO once we've squashed the remaining corner cases remove this log
-    LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
-
-    *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
-    if (!*(l[i].p)) {
-      resp->oh.handle = NULL;
-      char *msg = LOAD_ERR();
-      LOG(resp->oh.verbose, "dlerr: %s\n", msg);
-      UNLOAD_LIBRARY(resp->oh.handle);
-      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
-      free(msg);
-      resp->err = strdup(buf);
-      return;
-    }
-  }
-
-  LOG(resp->oh.verbose, "calling zesInit\n");
-
-  ret = (*resp->oh.zesInit)(0);
-  if (ret != ZE_RESULT_SUCCESS) {
-    LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
-    snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
-    resp->err = strdup(buf);
-    oneapi_release(resp->oh);
-    return;
-  }
-
-  LOG(resp->oh.verbose, "calling zesDriverGet\n");
-  ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
-  if (ret != ZE_RESULT_SUCCESS) {
-    LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
-    snprintf(buf, buflen, "unable to get driver count: %x", ret);
-    resp->err = strdup(buf);
-    oneapi_release(resp->oh);
-    return;
-  }
-  LOG(resp->oh.verbose, "oneapi driver count: %d\n", resp->oh.num_drivers);
-  resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
-  resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
-  memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
-  resp->oh.devices =
-      malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t *));
-  ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
-  if (ret != ZE_RESULT_SUCCESS) {
-    LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
-    snprintf(buf, buflen, "unable to get driver count: %x", ret);
-    resp->err = strdup(buf);
-    oneapi_release(resp->oh);
-    return;
-  }
-
-  for (d = 0; d < resp->oh.num_drivers; d++) {
-    LOG(resp->oh.verbose, "calling zesDeviceGet count %d: %p\n", d, resp->oh.drivers[d]);
-    ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d],
-                                   &resp->oh.num_devices[d], NULL);
-    if (ret != ZE_RESULT_SUCCESS) {
-      LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
-      snprintf(buf, buflen, "unable to get device count: %x", ret);
-      resp->err = strdup(buf);
-      oneapi_release(resp->oh);
-      return;
-    }
-    resp->oh.devices[d] =
-        malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
-    ret = (*resp->oh.zesDeviceGet)(
-        resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
-    if (ret != ZE_RESULT_SUCCESS) {
-      LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
-      snprintf(buf, buflen, "unable to get device count: %x", ret);
-      resp->err = strdup(buf);
-      oneapi_release(resp->oh);
-      return;
-    }
-  }
-
-  return;
-}
-
-void oneapi_check_vram(oneapi_handle_t h, int driver, int device,
-                       mem_info_t *resp) {
-  ze_result_t ret;
-  resp->err = NULL;
-  uint64_t totalMem = 0;
-  uint64_t usedMem = 0;
-  const int buflen = 256;
-  char buf[buflen + 1];
-  int i, d, m;
-
-  if (h.handle == NULL) {
-    resp->err = strdup("Level-Zero handle not initialized");
-    return;
-  }
-
-  if (driver > h.num_drivers || device > h.num_devices[driver]) {
-    resp->err = strdup("driver of device index out of bounds");
-    return;
-  }
-
-  resp->total = 0;
-  resp->free = 0;
-
-  zes_device_ext_properties_t ext_props;
-  ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
-  ext_props.pNext = NULL;
-
-  zes_device_properties_t props;
-  props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
-  props.pNext = &ext_props;
-
-  ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
-  if (ret != ZE_RESULT_SUCCESS) {
-    snprintf(buf, buflen, "unable to get device properties: %d", ret);
-    resp->err = strdup(buf);
-    return;
-  }
-
-  snprintf(&resp->gpu_name[0], GPU_NAME_LEN, "%s", props.modelName);
-
-  // TODO this needs to map to ONEAPI_DEVICE_SELECTOR syntax
-  // (this is probably wrong...)
-  // TODO - the driver isn't included - what if there are multiple drivers?
-  snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
-
-  if (h.verbose) {
-    // When in verbose mode, report more information about
-    // the card we discover.
-    LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
-        props.modelName);
-    LOG(h.verbose, "[%d:%d] oneAPI brand: %s\n", driver, device,
-        props.brandName);
-    LOG(h.verbose, "[%d:%d] oneAPI vendor: %s\n", driver, device,
-        props.vendorName);
-    LOG(h.verbose, "[%d:%d] oneAPI S/N: %s\n", driver, device,
-        props.serialNumber);
-    LOG(h.verbose, "[%d:%d] oneAPI board number: %s\n", driver, device,
-        props.boardNumber);
-  }
-
-  // TODO
-  // Compute Capability equivalent in resp->major, resp->minor, resp->patch
-
-  uint32_t memCount = 0;
-  ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount,
-                                        NULL);
-  if (ret != ZE_RESULT_SUCCESS) {
-    snprintf(buf, buflen, "unable to enumerate Level-Zero memory modules: %x",
-             ret);
-    resp->err = strdup(buf);
-    return;
-  }
-
-  LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
-
-  zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
-  (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);
-
-  for (m = 0; m < memCount; m++) {
-    zes_mem_state_t state;
-    state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
-    state.pNext = NULL;
-    ret = (*h.zesMemoryGetState)(mems[m], &state);
-    if (ret != ZE_RESULT_SUCCESS) {
-      snprintf(buf, buflen, "unable to get memory state: %x", ret);
-      resp->err = strdup(buf);
-      free(mems);
-      return;
-    }
-
-    resp->total += state.size;
-    resp->free += state.free;
-  }
-
-  free(mems);
-}
-
-void oneapi_release(oneapi_handle_t h) {
-  int d;
-  LOG(h.verbose, "releasing oneapi library\n");
-  for (d = 0; d < h.num_drivers; d++) {
-    if (h.devices != NULL && h.devices[d] != NULL) {
-      free(h.devices[d]);
-    }
-  }
-  if (h.devices != NULL) {
-    free(h.devices);
-    h.devices = NULL;
-  }
-  if (h.num_devices != NULL) {
-    free(h.num_devices);
-    h.num_devices = NULL;
-  }
-  if (h.drivers != NULL) {
-    free(h.drivers);
-    h.drivers = NULL;
-  }
-  h.num_drivers = 0;
-  UNLOAD_LIBRARY(h.handle);
-  h.handle = NULL;
-}
-
-int oneapi_get_device_count(oneapi_handle_t h, int driver) {
-  if (h.handle == NULL || h.num_devices == NULL) {
-    return 0;
-  }
-  if (driver > h.num_drivers) {
-    return 0;
-  }
-  return (int)h.num_devices[driver];
-}
-
-#endif // __APPLE__
--- a/discover/gpu_linux.go
+++ b/discover/gpu_linux.go
@ -1,199 +0,0 @@
-package discover
-
-import (
-	"bufio"
-	"fmt"
-	"io"
-	"os"
-	"reflect"
-	"regexp"
-	"sort"
-	"strings"
-
-	"github.com/ollama/ollama/format"
-)
-
-var CudartGlobs = []string{
-	"/usr/local/cuda/lib64/libcudart.so*",
-	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
-	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
-	"/usr/lib/wsl/lib/libcudart.so*",
-	"/usr/lib/wsl/drivers/*/libcudart.so*",
-	"/opt/cuda/lib64/libcudart.so*",
-	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
-	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
-	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
-	"/usr/local/cuda/lib*/libcudart.so*",
-	"/usr/lib*/libcudart.so*",
-	"/usr/local/lib*/libcudart.so*",
-}
-
-var NvmlGlobs = []string{}
-
-var NvcudaGlobs = []string{
-	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
-	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
-	"/usr/lib/*-linux-gnu/libcuda.so*",
-	"/usr/lib/wsl/lib/libcuda.so*",
-	"/usr/lib/wsl/drivers/*/libcuda.so*",
-	"/opt/cuda/lib*/libcuda.so*",
-	"/usr/local/cuda/lib*/libcuda.so*",
-	"/usr/lib*/libcuda.so*",
-	"/usr/local/lib*/libcuda.so*",
-}
-
-var OneapiGlobs = []string{
-	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
-	"/usr/lib*/libze_intel_gpu.so*",
-}
-
-var (
-	CudartMgmtName = "libcudart.so*"
-	NvcudaMgmtName = "libcuda.so*"
-	NvmlMgmtName   = "" // not currently wired on linux
-	OneapiMgmtName = "libze_intel_gpu.so*"
-)
-
-func GetCPUMem() (memInfo, error) {
-	var mem memInfo
-	var total, available, free, buffers, cached, freeSwap uint64
-	f, err := os.Open("/proc/meminfo")
-	if err != nil {
-		return mem, err
-	}
-	defer f.Close()
-	s := bufio.NewScanner(f)
-	for s.Scan() {
-		line := s.Text()
-		switch {
-		case strings.HasPrefix(line, "MemTotal:"):
-			_, err = fmt.Sscanf(line, "MemTotal:%d", &total)
-		case strings.HasPrefix(line, "MemAvailable:"):
-			_, err = fmt.Sscanf(line, "MemAvailable:%d", &available)
-		case strings.HasPrefix(line, "MemFree:"):
-			_, err = fmt.Sscanf(line, "MemFree:%d", &free)
-		case strings.HasPrefix(line, "Buffers:"):
-			_, err = fmt.Sscanf(line, "Buffers:%d", &buffers)
-		case strings.HasPrefix(line, "Cached:"):
-			_, err = fmt.Sscanf(line, "Cached:%d", &cached)
-		case strings.HasPrefix(line, "SwapFree:"):
-			_, err = fmt.Sscanf(line, "SwapFree:%d", &freeSwap)
-		default:
-			continue
-		}
-		if err != nil {
-			return mem, err
-		}
-	}
-	mem.TotalMemory = total * format.KibiByte
-	mem.FreeSwap = freeSwap * format.KibiByte
-	if available > 0 {
-		mem.FreeMemory = available * format.KibiByte
-	} else {
-		mem.FreeMemory = (free + buffers + cached) * format.KibiByte
-	}
-	return mem, nil
-}
-
-const CpuInfoFilename = "/proc/cpuinfo"
-
-type linuxCpuInfo struct {
-	ID         string `cpuinfo:"processor"`
-	VendorID   string `cpuinfo:"vendor_id"`
-	ModelName  string `cpuinfo:"model name"`
-	PhysicalID string `cpuinfo:"physical id"`
-	Siblings   string `cpuinfo:"siblings"`
-	CoreID     string `cpuinfo:"core id"`
-}
-
-func GetCPUDetails() ([]CPU, error) {
-	file, err := os.Open(CpuInfoFilename)
-	if err != nil {
-		return nil, err
-	}
-	return linuxCPUDetails(file)
-}
-
-func linuxCPUDetails(file io.Reader) ([]CPU, error) {
-	reColumns := regexp.MustCompile("\t+: ")
-	scanner := bufio.NewScanner(file)
-	cpuInfos := []linuxCpuInfo{}
-	cpu := &linuxCpuInfo{}
-	for scanner.Scan() {
-		line := scanner.Text()
-		if sl := reColumns.Split(line, 2); len(sl) > 1 {
-			t := reflect.TypeOf(cpu).Elem()
-			s := reflect.ValueOf(cpu).Elem()
-			for i := range t.NumField() {
-				field := t.Field(i)
-				tag := field.Tag.Get("cpuinfo")
-				if tag == sl[0] {
-					s.FieldByName(field.Name).SetString(sl[1])
-					break
-				}
-			}
-		} else if strings.TrimSpace(line) == "" && cpu.ID != "" {
-			cpuInfos = append(cpuInfos, *cpu)
-			cpu = &linuxCpuInfo{}
-		}
-	}
-	if cpu.ID != "" {
-		cpuInfos = append(cpuInfos, *cpu)
-	}
-
-	// Process the sockets/cores/threads
-	socketByID := map[string]*CPU{}
-	coreBySocket := map[string]map[string]struct{}{}
-	threadsByCoreBySocket := map[string]map[string]int{}
-	for _, c := range cpuInfos {
-		if _, found := socketByID[c.PhysicalID]; !found {
-			socketByID[c.PhysicalID] = &CPU{
-				ID:        c.PhysicalID,
-				VendorID:  c.VendorID,
-				ModelName: c.ModelName,
-			}
-			coreBySocket[c.PhysicalID] = map[string]struct{}{}
-			threadsByCoreBySocket[c.PhysicalID] = map[string]int{}
-		}
-		if c.CoreID != "" {
-			coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID] = struct{}{}
-			threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID]++
-		} else {
-			coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID] = struct{}{}
-			threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID]++
-		}
-	}
-
-	// Tally up the values from the tracking maps
-	for id, s := range socketByID {
-		s.CoreCount = len(coreBySocket[id])
-		s.ThreadCount = 0
-		for _, tc := range threadsByCoreBySocket[id] {
-			s.ThreadCount += tc
-		}
-
-		// This only works if HT is enabled, consider a more reliable model, maybe cache size comparisons?
-		efficiencyCoreCount := 0
-		for _, threads := range threadsByCoreBySocket[id] {
-			if threads == 1 {
-				efficiencyCoreCount++
-			}
-		}
-		if efficiencyCoreCount == s.CoreCount {
-			// 1:1 mapping means they're not actually efficiency cores, but regular cores
-			s.EfficiencyCoreCount = 0
-		} else {
-			s.EfficiencyCoreCount = efficiencyCoreCount
-		}
-	}
-	keys := make([]string, 0, len(socketByID))
-	result := make([]CPU, 0, len(socketByID))
-	for k := range socketByID {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-	for _, k := range keys {
-		result = append(result, *socketByID[k])
-	}
-	return result, nil
-}
--- a/discover/gpu_linux_test.go
+++ b/discover/gpu_linux_test.go
--- a/discover/gpu_test.go
+++ b/discover/gpu_test.go
@ -1,60 +0,0 @@
-package discover
-
-import (
-	"runtime"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestBasicGetGPUInfo(t *testing.T) {
-	info := GetGPUInfo()
-	assert.NotEmpty(t, len(info))
-	assert.Contains(t, "cuda rocm cpu metal", info[0].Library)
-	if info[0].Library != "cpu" {
-		assert.Greater(t, info[0].TotalMemory, uint64(0))
-		assert.Greater(t, info[0].FreeMemory, uint64(0))
-	}
-}
-
-func TestCPUMemInfo(t *testing.T) {
-	info, err := GetCPUMem()
-	require.NoError(t, err)
-	switch runtime.GOOS {
-	case "darwin":
-		t.Skip("CPU memory not populated on darwin")
-	case "linux", "windows":
-		assert.Greater(t, info.TotalMemory, uint64(0))
-		assert.Greater(t, info.FreeMemory, uint64(0))
-	default:
-		return
-	}
-}
-
-func TestByLibrary(t *testing.T) {
-	type testCase struct {
-		input  []GpuInfo
-		expect int
-	}
-
-	testCases := map[string]*testCase{
-		"empty":                    {input: []GpuInfo{}, expect: 0},
-		"cpu":                      {input: []GpuInfo{{Library: "cpu"}}, expect: 1},
-		"cpu + GPU":                {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}}, expect: 2},
-		"cpu + 2 GPU no variant":   {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda"}, {Library: "cuda"}}, expect: 2},
-		"cpu + 2 GPU same variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v11"}}, expect: 2},
-		"cpu + 2 GPU diff variant": {input: []GpuInfo{{Library: "cpu"}, {Library: "cuda", Variant: "v11"}, {Library: "cuda", Variant: "v12"}}, expect: 3},
-	}
-
-	for k, v := range testCases {
-		t.Run(k, func(t *testing.T) {
-			resp := (GpuInfoList)(v.input).ByLibrary()
-			if len(resp) != v.expect {
-				t.Fatalf("expected length %d, got %d => %+v", v.expect, len(resp), resp)
-			}
-		})
-	}
-}
-
-// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
--- a/discover/gpu_windows.go
+++ b/discover/gpu_windows.go
@ -1,234 +0,0 @@
-package discover
-
-import (
-	"fmt"
-	"log/slog"
-	"syscall"
-	"unsafe"
-)
-
-type MEMORYSTATUSEX struct {
-	length               uint32
-	MemoryLoad           uint32
-	TotalPhys            uint64
-	AvailPhys            uint64
-	TotalPageFile        uint64
-	AvailPageFile        uint64
-	TotalVirtual         uint64
-	AvailVirtual         uint64
-	AvailExtendedVirtual uint64
-}
-
-var (
-	k32                              = syscall.NewLazyDLL("kernel32.dll")
-	globalMemoryStatusExProc         = k32.NewProc("GlobalMemoryStatusEx")
-	sizeofMemoryStatusEx             = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
-	GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
-)
-
-var CudartGlobs = []string{
-	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
-}
-
-var NvmlGlobs = []string{
-	"c:\\Windows\\System32\\nvml.dll",
-}
-
-var NvcudaGlobs = []string{
-	"c:\\windows\\system*\\nvcuda.dll",
-}
-
-var OneapiGlobs = []string{
-	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
-}
-
-var (
-	CudartMgmtName = "cudart64_*.dll"
-	NvcudaMgmtName = "nvcuda.dll"
-	NvmlMgmtName   = "nvml.dll"
-	OneapiMgmtName = "ze_intel_gpu64.dll"
-)
-
-func GetCPUMem() (memInfo, error) {
-	memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
-	r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
-	if r1 == 0 {
-		return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
-	}
-	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
-}
-
-type LOGICAL_PROCESSOR_RELATIONSHIP uint32
-
-const (
-	RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
-	RelationNumaNode
-	RelationCache
-	RelationProcessorPackage
-	RelationGroup
-	RelationProcessorDie
-	RelationNumaNodeEx
-	RelationProcessorModule
-)
-const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
-
-type GROUP_AFFINITY struct {
-	Mask     uintptr // KAFFINITY
-	Group    uint16
-	Reserved [3]uint16
-}
-
-type PROCESSOR_RELATIONSHIP struct {
-	Flags           byte
-	EfficiencyClass byte
-	Reserved        [20]byte
-	GroupCount      uint16
-	GroupMask       [1]GROUP_AFFINITY // len GroupCount
-}
-
-// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
-
-type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
-	Relationship LOGICAL_PROCESSOR_RELATIONSHIP
-	Size         uint32
-	U            [1]byte // Union len Size
-	// PROCESSOR_RELATIONSHIP
-	// NUMA_NODE_RELATIONSHIP
-	// CACHE_RELATIONSHIP
-	// GROUP_RELATIONSHIP
-}
-
-func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
-	if group == nil || target == nil {
-		return false
-	}
-	return group.Mask&target.Mask != 0
-}
-
-type winPackage struct {
-	groups              []*GROUP_AFFINITY
-	coreCount           int // performance cores = coreCount - efficiencyCoreCount
-	efficiencyCoreCount int
-	threadCount         int
-}
-
-func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
-	for _, group := range pkg.groups {
-		if group.IsMember(target) {
-			return true
-		}
-	}
-	return false
-}
-
-func getLogicalProcessorInformationEx() ([]byte, error) {
-	buf := make([]byte, 1)
-	bufSize := len(buf)
-	ret, _, err := GetLogicalProcessorInformationEx.Call(
-		uintptr(RelationAll),
-		uintptr(unsafe.Pointer(&buf[0])),
-		uintptr(unsafe.Pointer(&bufSize)),
-	)
-	if ret != 0 {
-		return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
-	}
-
-	buf = make([]byte, bufSize)
-	ret, _, err = GetLogicalProcessorInformationEx.Call(
-		uintptr(RelationAll),
-		uintptr(unsafe.Pointer(&buf[0])),
-		uintptr(unsafe.Pointer(&bufSize)),
-	)
-	if ret == 0 {
-		return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
-	}
-	return buf, nil
-}
-
-func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
-	var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
-	// Find all the packages first
-	packages := []*winPackage{}
-	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
-		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
-		if slpi.Relationship != RelationProcessorPackage {
-			continue
-		}
-		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
-		pkg := &winPackage{}
-		ga0 := unsafe.Pointer(&pr.GroupMask[0])
-		for j := range pr.GroupCount {
-			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
-			pkg.groups = append(pkg.groups, gm)
-		}
-		packages = append(packages, pkg)
-	}
-
-	slog.Info("packages", "count", len(packages))
-
-	// To identify efficiency cores we have to compare the relative values
-	// Larger values are "less efficient" (aka, more performant)
-	var maxEfficiencyClass byte
-	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
-		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
-		if slpi.Relationship != RelationProcessorCore {
-			continue
-		}
-		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
-		if pr.EfficiencyClass > maxEfficiencyClass {
-			maxEfficiencyClass = pr.EfficiencyClass
-		}
-	}
-	if maxEfficiencyClass > 0 {
-		slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
-	}
-
-	// then match up the Cores to the Packages, count up cores, threads and efficiency cores
-	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
-		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
-		if slpi.Relationship != RelationProcessorCore {
-			continue
-		}
-		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
-		ga0 := unsafe.Pointer(&pr.GroupMask[0])
-		for j := range pr.GroupCount {
-			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
-			for _, pkg := range packages {
-				if pkg.IsMember(gm) {
-					pkg.coreCount++
-					if pr.Flags == 0 {
-						pkg.threadCount++
-					} else {
-						pkg.threadCount += 2
-					}
-					if pr.EfficiencyClass < maxEfficiencyClass {
-						pkg.efficiencyCoreCount++
-					}
-				}
-			}
-		}
-	}
-
-	// Sumarize the results
-	for i, pkg := range packages {
-		slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
-	}
-
-	return packages
-}
-
-func GetCPUDetails() ([]CPU, error) {
-	buf, err := getLogicalProcessorInformationEx()
-	if err != nil {
-		return nil, err
-	}
-	packages := processSystemLogicalProcessorInforationList(buf)
-	cpus := make([]CPU, len(packages))
-
-	for i, pkg := range packages {
-		cpus[i].CoreCount = pkg.coreCount
-		cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
-		cpus[i].ThreadCount = pkg.threadCount
-	}
-	return cpus, nil
-}
--- a/discover/gpu_windows_test.go
+++ b/discover/gpu_windows_test.go
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Josh Yan	5dc5a295bf	added testcase	2024-06-03 17:28:05 -07:00
Josh Yan	e21e6b2a33	added testcase	2024-06-03 17:27:38 -07:00
Josh Yan	a240ea3367	humanNumbers formats to 3 digits, added trillion case for future	2024-06-03 17:26:02 -07:00
				`@ -1 +0,0 @@`
				`This is here to make sure the build/ directory exists for the go:embed command`