diff --git a/models/imageproc/images.go b/models/imageproc/images.go
new file mode 100644
index 00000000..7afe3670
--- /dev/null
+++ b/models/imageproc/images.go
@@ -0,0 +1,111 @@
+package imageproc
+
+import (
+	"image"
+	"image/color"
+
+	"golang.org/x/image/draw"
+)
+
+var (
+	ImageNetDefaultMean  = [3]float32{0.485, 0.456, 0.406}
+	ImageNetDefaultSTD   = [3]float32{0.229, 0.224, 0.225}
+	ImageNetStandardMean = [3]float32{0.5, 0.5, 0.5}
+	ImageNetStandardSTD  = [3]float32{0.5, 0.5, 0.5}
+	ClipDefaultMean      = [3]float32{0.48145466, 0.4578275, 0.40821073}
+	ClipDefaultSTD       = [3]float32{0.26862954, 0.26130258, 0.27577711}
+)
+
+const (
+	ResizeBilinear = iota
+	ResizeNearestNeighbor
+	ResizeApproxBilinear
+	ResizeCatmullrom
+)
+
+// Composite returns an image with the alpha channel removed by drawing over a white background.
+func Composite(img image.Image) image.Image {
+	dst := image.NewRGBA(img.Bounds())
+
+	white := color.RGBA{255, 255, 255, 255}
+	draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
+	draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
+
+	return dst
+}
+
+// Resize returns an image which has been scaled to a new size.
+func Resize(img image.Image, newSize image.Point, method int) image.Image {
+	dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
+
+	kernels := map[int]draw.Interpolator{
+		ResizeBilinear:        draw.BiLinear,
+		ResizeNearestNeighbor: draw.NearestNeighbor,
+		ResizeApproxBilinear:  draw.ApproxBiLinear,
+		ResizeCatmullrom:      draw.CatmullRom,
+	}
+
+	kernel, ok := kernels[method]
+	if !ok {
+		panic("no resizing method found")
+	}
+
+	kernel.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil)
+
+	return dst
+}
+
+// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
+func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
+	var pixelVals []float32
+
+	bounds := img.Bounds()
+	if channelFirst {
+		var rVals, gVals, bVals []float32
+		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
+			for x := bounds.Min.X; x < bounds.Max.X; x++ {
+				c := img.At(x, y)
+				r, g, b, _ := c.RGBA()
+				var rVal, gVal, bVal float32
+				if rescale {
+					rVal = float32(r>>8) / 255.0
+					gVal = float32(g>>8) / 255.0
+					bVal = float32(b>>8) / 255.0
+				}
+
+				rVal = (rVal - mean[0]) / std[0]
+				gVal = (gVal - mean[1]) / std[1]
+				bVal = (bVal - mean[2]) / std[2]
+
+				rVals = append(rVals, rVal)
+				gVals = append(gVals, gVal)
+				bVals = append(bVals, bVal)
+			}
+		}
+
+		pixelVals = append(pixelVals, rVals...)
+		pixelVals = append(pixelVals, gVals...)
+		pixelVals = append(pixelVals, bVals...)
+	} else {
+		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
+			for x := bounds.Min.X; x < bounds.Max.X; x++ {
+				c := img.At(x, y)
+				r, g, b, _ := c.RGBA()
+				var rVal, gVal, bVal float32
+				if rescale {
+					rVal = float32(r>>8) / 255.0
+					gVal = float32(g>>8) / 255.0
+					bVal = float32(b>>8) / 255.0
+				}
+
+				rVal = (rVal - mean[0]) / std[0]
+				gVal = (gVal - mean[1]) / std[1]
+				bVal = (bVal - mean[2]) / std[2]
+
+				pixelVals = append(pixelVals, rVal, gVal, bVal)
+			}
+		}
+	}
+
+	return pixelVals
+}
diff --git a/models/imageproc/images_test.go b/models/imageproc/images_test.go
new file mode 100644
index 00000000..a2e9ed94
--- /dev/null
+++ b/models/imageproc/images_test.go
@@ -0,0 +1,177 @@
+package imageproc
+
+import (
+	"image"
+	"image/color"
+	"image/draw"
+	"reflect"
+	"testing"
+)
+
+func createImage(width, height int, fillCol color.RGBA) image.Image {
+	img := image.NewRGBA(image.Rect(0, 0, width, height))
+	draw.Draw(img, img.Bounds(), &image.Uniform{fillCol}, image.Point{}, draw.Src)
+	return img
+}
+
+func TestComposite(t *testing.T) {
+	tests := []struct {
+		name         string
+		img          image.Image
+		expectedRGBA color.RGBA
+	}{
+		{
+			name:         "Transparent image",
+			img:          createImage(5, 5, color.RGBA{0, 0, 0, 0}),
+			expectedRGBA: color.RGBA{255, 255, 255, 255},
+		},
+		{
+			name:         "Solid red image",
+			img:          createImage(5, 5, color.RGBA{255, 0, 0, 255}),
+			expectedRGBA: color.RGBA{255, 0, 0, 255},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			resultImg := Composite(tt.img)
+
+			// Check the pixel values in the resulting image
+			for x := range resultImg.Bounds().Dx() {
+				for y := range resultImg.Bounds().Dy() {
+					r, g, b, a := resultImg.At(x, y).RGBA()
+					expectedR, expectedG, expectedB, expectedA := tt.expectedRGBA.RGBA()
+
+					if r != expectedR || g != expectedG || b != expectedB || a != expectedA {
+						t.Errorf("Pixel mismatch at (%d, %d): got (%d, %d, %d, %d), want (%d, %d, %d, %d)",
+							x, y, r, g, b, a, expectedR, expectedG, expectedB, expectedA)
+					}
+				}
+			}
+		})
+	}
+}
+
+func TestResize(t *testing.T) {
+	tests := []struct {
+		name     string
+		img      image.Image
+		newSize  image.Point
+		method   int
+		expected image.Point
+	}{
+		{
+			name:     "Resize with bilinear interpolation",
+			img:      createImage(5, 5, color.RGBA{255, 0, 0, 255}),
+			newSize:  image.Point{10, 10},
+			method:   ResizeBilinear,
+			expected: image.Point{10, 10},
+		},
+		{
+			name:     "Resize with nearest neighbor",
+			img:      createImage(10, 10, color.RGBA{0, 255, 0, 255}),
+			newSize:  image.Point{5, 5},
+			method:   ResizeNearestNeighbor,
+			expected: image.Point{5, 5},
+		},
+		{
+			name:     "Resize with catmullrom",
+			img:      createImage(1024, 1024, color.RGBA{0, 0, 255, 255}),
+			newSize:  image.Point{10, 10},
+			method:   ResizeCatmullrom,
+			expected: image.Point{10, 10},
+		},
+		{
+			name:     "Resize with approx bilinear",
+			img:      createImage(1024, 768, color.RGBA{100, 100, 100, 255}),
+			newSize:  image.Point{4, 3},
+			method:   ResizeApproxBilinear,
+			expected: image.Point{4, 3},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			resizedImg := Resize(tt.img, tt.newSize, tt.method)
+
+			if resizedImg.Bounds().Dx() != tt.expected.X || resizedImg.Bounds().Dy() != tt.expected.Y {
+				t.Errorf("Unexpected size for resized image: got (%d, %d), want (%d, %d)",
+					resizedImg.Bounds().Dx(), resizedImg.Bounds().Dy(), tt.expected.X, tt.expected.Y)
+			}
+		})
+	}
+}
+
+func TestResizeInvalidMethod(t *testing.T) {
+	defer func() {
+		if r := recover(); r == nil {
+			t.Errorf("Expected panic for invalid resizing method, but did not panic")
+		}
+	}()
+
+	img := createImage(10, 10, color.RGBA{0, 0, 0, 255})
+	Resize(img, image.Point{5, 5}, -1)
+}
+
+func TestNormalize(t *testing.T) {
+	tests := []struct {
+		name         string
+		img          image.Image
+		mean         [3]float32
+		std          [3]float32
+		rescale      bool
+		channelFirst bool
+		expected     []float32
+	}{
+		{
+			name:         "Rescale with channel first",
+			img:          createImage(2, 2, color.RGBA{128, 128, 128, 255}),
+			mean:         ImageNetStandardMean,
+			std:          ImageNetStandardSTD,
+			rescale:      true,
+			channelFirst: true,
+			expected: []float32{
+				0.003921628, 0.003921628, 0.003921628, 0.003921628, // R values
+				0.003921628, 0.003921628, 0.003921628, 0.003921628, // G values
+				0.003921628, 0.003921628, 0.003921628, 0.003921628, // B values
+			},
+		},
+		{
+			name:         "Rescale without channel first",
+			img:          createImage(2, 2, color.RGBA{255, 0, 0, 255}),
+			mean:         [3]float32{0.0, 0.0, 0.0},
+			std:          [3]float32{1.0, 1.0, 1.0},
+			rescale:      true,
+			channelFirst: false,
+			expected: []float32{
+				1.0, 0.0, 0.0,
+				1.0, 0.0, 0.0,
+				1.0, 0.0, 0.0,
+				1.0, 0.0, 0.0,
+			},
+		},
+		{
+			name:         "No rescale with mean/std adjustment",
+			img:          createImage(2, 2, color.RGBA{100, 150, 200, 255}),
+			mean:         ClipDefaultMean,
+			std:          ClipDefaultSTD,
+			rescale:      false,
+			channelFirst: false,
+			expected: []float32{
+				-1.7922626, -1.7520971, -1.4802198,
+				-1.7922626, -1.7520971, -1.4802198,
+				-1.7922626, -1.7520971, -1.4802198,
+				-1.7922626, -1.7520971, -1.4802198,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := Normalize(tt.img, tt.mean, tt.std, tt.rescale, tt.channelFirst)
+
+			if !reflect.DeepEqual(result, tt.expected) {
+				t.Errorf("Test %s failed: got %v, want %v", tt.name, result, tt.expected)
+			}
+		})
+	}
+}
diff --git a/server/imageproc/images.go b/models/mllama/imageproc.go
similarity index 60%
rename from server/imageproc/images.go
rename to models/mllama/imageproc.go
index 688cbf8a..d7a5ad8d 100644
--- a/server/imageproc/images.go
+++ b/models/mllama/imageproc.go
@@ -1,19 +1,20 @@
-package imageproc
+package mllama
 
 import (
-	"bytes"
 	"fmt"
 	"image"
-	"image/color"
 	_ "image/jpeg"
 	_ "image/png"
+	"io"
 	"math"
 	"slices"
 
 	"golang.org/x/image/draw"
+
+	"github.com/ollama/ollama/models/imageproc"
 )
 
-func GetSupportedAspectRatios(maxTiles int) []image.Point {
+func getSupportedAspectRatios(maxTiles int) []image.Point {
 	ratios := []image.Point{}
 
 	for w := range maxTiles {
@@ -37,28 +38,8 @@ func clip(a, a_min, a_max int) int {
 	return a
 }
 
-func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
-	targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
-	targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
-
-	scaleWidth := float64(targetWidth) / float64(imageSize.X)
-	scaleHeight := float64(targetHeight) / float64(imageSize.Y)
-
-	var w, h int
-
-	if scaleWidth < scaleHeight {
-		w = targetWidth
-		h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
-	} else {
-		w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
-		h = targetHeight
-	}
-
-	return image.Point{w, h}
-}
-
 func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
-	possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
+	possibleTileArrangements := getSupportedAspectRatios(maxImageTiles)
 	possibleCanvasSizes := []image.Point{}
 	for _, pta := range possibleTileArrangements {
 		possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
@@ -113,6 +94,53 @@ func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
 	return selectedCanvas
 }
 
+func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
+	targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
+	targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
+
+	scaleWidth := float64(targetWidth) / float64(imageSize.X)
+	scaleHeight := float64(targetHeight) / float64(imageSize.Y)
+
+	var w, h int
+
+	if scaleWidth < scaleHeight {
+		w = targetWidth
+		h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
+	} else {
+		w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
+		h = targetHeight
+	}
+
+	return image.Point{w, h}
+}
+
+func resizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
+	if format == "png" {
+		img = imageproc.Composite(img)
+	}
+
+	b := img.Bounds()
+	tileSize := outputSize.Y
+
+	canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
+	aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
+	newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
+
+	return imageproc.Resize(img, newSize, imageproc.ResizeBilinear), aspectRatio
+}
+
+func padImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
+	paddedSize := image.Point{
+		X: outputSize.X * aspectRatio.X,
+		Y: outputSize.Y * aspectRatio.Y,
+	}
+
+	dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
+	draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
+
+	return dst
+}
+
 func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
 	b := img.Bounds()
 	width := b.Max.X - b.Min.X
@@ -134,107 +162,40 @@ func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
 	return images
 }
 
-// remove the "alpha" channel by drawing over a prefilled image
-func compositeImage(img image.Image) image.Image {
-	dst := image.NewRGBA(img.Bounds())
-
-	white := color.RGBA{255, 255, 255, 255}
-	draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
-	draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
-
-	return dst
-}
-
-func ResizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
-	if format == "png" {
-		img = compositeImage(img)
-	}
-
-	b := img.Bounds()
-	tileSize := outputSize.Y
-
-	canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
-	aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
-	newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
-
-	dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
-
-	// scaling choices:
-	//   NearestNeighbor	fast, blocky output
-	//   ApproxBiLinear	fast, medium quality
-	//   BiLinear		slow, high quality
-	//   CatmullRom		very slow, very high quality
-	draw.BiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
-
-	return dst, aspectRatio
-}
-
-func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
-	paddedSize := image.Point{
-		X: outputSize.X * aspectRatio.X,
-		Y: outputSize.Y * aspectRatio.Y,
-	}
-
-	dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
-	draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
-
-	return dst
-}
-
-func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
+func packImages(img image.Image, aspectRatio image.Point) []float32 {
 	subImages := splitToTiles(img, aspectRatio)
 
 	var pixelVals []float32
 
+	rescale := true
+	channelFirst := true
+
 	for _, subImg := range subImages {
-		bounds := subImg.Bounds()
-		var rVals, gVals, bVals []float32
-		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
-			for x := bounds.Min.X; x < bounds.Max.X; x++ {
-				c := subImg.At(x, y)
-				r, g, b, _ := c.RGBA()
-				rVal := float32(r>>8) / 255.0
-				gVal := float32(g>>8) / 255.0
-				bVal := float32(b>>8) / 255.0
-
-				rVal = (rVal - mean[0]) / std[0]
-				gVal = (gVal - mean[1]) / std[1]
-				bVal = (bVal - mean[2]) / std[2]
-
-				rVals = append(rVals, rVal)
-				gVals = append(gVals, gVal)
-				bVals = append(bVals, bVal)
-			}
-		}
-		pixelVals = append(pixelVals, rVals...)
-		pixelVals = append(pixelVals, gVals...)
-		pixelVals = append(pixelVals, bVals...)
+		vals := imageproc.Normalize(subImg, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, rescale, channelFirst)
+		pixelVals = append(pixelVals, vals...)
 	}
 
 	return pixelVals
 }
 
-func Preprocess(imageData []byte) ([]float32, int, error) {
-	// todo: need guard in here for bad image data
-
-	// mllama values
+func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
 	outputSize := image.Point{560, 560}
 	maxTiles := 4
 
-	// clip values
-	mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
-	std := [3]float32{0.26862954, 0.26130258, 0.27577711}
-
-	img, format, err := image.Decode(bytes.NewReader(imageData))
+	img, format, err := image.Decode(imageData)
 	if err != nil {
-		return nil, 0, fmt.Errorf("failed to decode image: %w", err)
+		return nil, nil, fmt.Errorf("failed to decode image: %w", err)
 	}
 
-	newImage, aspectRatio := ResizeImage(img, format, outputSize, maxTiles)
-	newImage = PadImage(newImage, outputSize, aspectRatio)
+	newImage, aspectRatio := resizeImage(img, format, outputSize, maxTiles)
+	newImage = padImage(newImage, outputSize, aspectRatio)
 
-	data := PackImages(newImage, aspectRatio, mean, std)
-	aspectRatioIndex := slices.Index(GetSupportedAspectRatios(maxTiles), aspectRatio) + 1
+	data := packImages(newImage, aspectRatio)
+	aspectRatioIndex := slices.Index(getSupportedAspectRatios(maxTiles), aspectRatio) + 1
 
-	return data, aspectRatioIndex, nil
+	opts := map[string]any{
+		"aspectRatioIndex": aspectRatioIndex,
+	}
+
+	return data, opts, nil
 }
diff --git a/server/imageproc/images_test.go b/models/mllama/imageproc_test.go
similarity index 95%
rename from server/imageproc/images_test.go
rename to models/mllama/imageproc_test.go
index 7ad5329b..a14b91bd 100644
--- a/server/imageproc/images_test.go
+++ b/models/mllama/imageproc_test.go
@@ -1,4 +1,4 @@
-package imageproc
+package mllama
 
 import (
 	"bytes"
@@ -35,7 +35,7 @@ func TestAspectRatios(t *testing.T) {
 	}
 
 	for _, c := range cases {
-		actual := GetSupportedAspectRatios(c.MaxTiles)
+		actual := getSupportedAspectRatios(c.MaxTiles)
 
 		if diff := cmp.Diff(actual, c.Expected); diff != "" {
 			t.Errorf("mismatch (-got +want):\n%s", diff)
@@ -299,7 +299,7 @@ func TestResize(t *testing.T) {
 	}
 
 	for _, c := range cases {
-		actualImage, actualAspectRatio := ResizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
+		actualImage, actualAspectRatio := resizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
 
 		if actualImage.Bounds() != c.ExpectedImage.Bounds() {
 			t.Errorf("image size incorrect: '%#v': expected: '%#v'", actualImage.Bounds(), c.ExpectedImage.Bounds())
@@ -329,7 +329,7 @@ func TestPad(t *testing.T) {
 	}
 
 	for _, c := range cases {
-		actual := PadImage(c.TestImage, c.OutputSize, c.AspectRatio)
+		actual := padImage(c.TestImage, c.OutputSize, c.AspectRatio)
 
 		if actual.Bounds() != c.Expected.Bounds() {
 			t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
@@ -344,9 +344,6 @@ func TestPackImages(t *testing.T) {
 		ExpectedVals int
 	}
 
-	mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
-	std := [3]float32{0.26862954, 0.26130258, 0.27577711}
-
 	cases := []packCase{
 		{
 			TestImage:    image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
@@ -366,7 +363,7 @@ func TestPackImages(t *testing.T) {
 	}
 
 	for _, c := range cases {
-		actualVals := PackImages(c.TestImage, c.AspectRatio, mean, std)
+		actualVals := packImages(c.TestImage, c.AspectRatio)
 		if len(actualVals) != c.ExpectedVals {
 			t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
 		}
@@ -400,7 +397,7 @@ func TestPreprocess(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		imgData, aspectRatioID, err := Preprocess(buf.Bytes())
+		imgData, opts, err := Preprocess(&buf)
 		if err != nil {
 			t.Fatalf("error processing: %q", err)
 		}
@@ -409,6 +406,13 @@ func TestPreprocess(t *testing.T) {
 			t.Errorf("no image data returned")
 		}
 
+		ar, ok := opts["aspectRatioIndex"]
+		if !ok {
+			t.Fatalf("no aspect ratio found")
+		}
+
+		aspectRatioID := ar.(int)
+
 		if aspectRatioID != c.ExpectedAspectRatioID {
 			t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
 		}
diff --git a/server/prompt.go b/server/prompt.go
index a6401983..ff48b43d 100644
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -11,7 +11,7 @@ import (
 
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
-	"github.com/ollama/ollama/server/imageproc"
+	"github.com/ollama/ollama/models/mllama"
 	"github.com/ollama/ollama/template"
 )
 
@@ -92,7 +92,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 			var imgData llm.ImageData
 
 			if isMllama {
-				data, aspectRatioID, err := imageproc.Preprocess(i)
+				data, opts, err := mllama.Preprocess(bytes.NewReader(i))
 				if err != nil {
 					return "", nil, err
 				}
@@ -103,10 +103,15 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 					return "", nil, err
 				}
 
+				ar, ok := opts["aspectRatioIndex"].(int)
+				if !ok {
+					return "", nil, fmt.Errorf("missing aspect ratio for image")
+				}
+
 				imgData = llm.ImageData{
 					ID:            len(images),
 					Data:          buf.Bytes(),
-					AspectRatioID: aspectRatioID,
+					AspectRatioID: ar,
 				}
 				imgPrompt = "<|image|>"
 			} else {
diff --git a/server/routes.go b/server/routes.go
index c5fd3293..ccd86265 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -31,10 +31,10 @@ import (
 	"github.com/ollama/ollama/discover"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/llm"
+	"github.com/ollama/ollama/models/mllama"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/runners"
-	"github.com/ollama/ollama/server/imageproc"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
@@ -192,12 +192,18 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	images := make([]llm.ImageData, len(req.Images))
 	for i := range req.Images {
 		if isMllama {
-			data, aspectRatioID, err := imageproc.Preprocess(req.Images[i])
+			data, opts, err := mllama.Preprocess(bytes.NewReader(req.Images[i]))
 			if err != nil {
 				c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
 				return
 			}
 
+			ar, ok := opts["aspectRatioIndex"].(int)
+			if !ok {
+				c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
+				return
+			}
+
 			buf := new(bytes.Buffer)
 			err = binary.Write(buf, binary.LittleEndian, data)
 			if err != nil {
@@ -205,7 +211,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 				return
 			}
 
-			images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: aspectRatioID}
+			images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: ar}
 		} else {
 			images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
 		}