From 41ae232e106aa6e43ae4ee72471cb3f6d1c97c56 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 1 May 2024 11:08:04 -0700 Subject: [PATCH] split model layer into metadata and data layers --- llm/ggla.go | 12 +++++++ llm/ggml.go | 1 + llm/gguf.go | 11 ++++-- server/images.go | 90 ++++++++++++++++++++++++++++++++++++++++++++---- server/layer.go | 11 +++--- 5 files changed, 113 insertions(+), 12 deletions(-) diff --git a/llm/ggla.go b/llm/ggla.go index cf14f214..4785fa3f 100644 --- a/llm/ggla.go +++ b/llm/ggla.go @@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) { type ggla struct { *containerGGLA + offset int64 kv KV tensors []*Tensor @@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors { return llm.tensors } +func (llm *ggla) Offset() int64 { + return llm.offset +} + func (llm *ggla) decode(rs io.ReadSeeker) error { var r uint32 if err := binary.Read(rs, binary.LittleEndian, &r); err != nil { @@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error { } llm.kv["alpha"] = alpha + offset, err := rs.Seek(0, io.SeekCurrent) + if err != nil { + return err + } + + llm.offset = offset + for { var dims uint32 if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil { diff --git a/llm/ggml.go b/llm/ggml.go index a83bba8f..7e654152 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -16,6 +16,7 @@ type GGML struct { type model interface { KV() KV Tensors() Tensors + Offset() int64 } type KV map[string]any diff --git a/llm/gguf.go b/llm/gguf.go index 5f6e8004..9421e95f 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) { model := newGGUF(c) slog.Debug(fmt.Sprintf("model = %#v", model)) - if err := model.Decode(rs); err != nil { + if err := model.decode(rs); err != nil { return nil, err } @@ -90,6 +90,7 @@ const ( type gguf struct { *containerGGUF + offset int64 kv KV tensors []*Tensor @@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors { return llm.tensors } +func (llm *gguf) Offset() int64 { + return llm.offset +} + func (llm *gguf) numTensor() uint64 { switch llm.Version { case 1: @@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 { } } -func (llm *gguf) Decode(rs io.ReadSeeker) error { +func (llm *gguf) decode(rs io.ReadSeeker) error { // decode key-values for i := 0; uint64(i) < llm.numKV(); i++ { k, err := readGGUFString(llm, rs) @@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error { return err } + llm.offset = offset + padding + for _, tensor := range llm.tensors { if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil { return err diff --git a/server/images.go b/server/images.go index 5519463e..6274c637 100644 --- a/server/images.go +++ b/server/images.go @@ -208,6 +208,14 @@ func GetModel(name string) (*Model, error) { switch layer.MediaType { case "application/vnd.ollama.image.model": + model.ModelPath = filename + model.ParentModel = layer.From + case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data": + filename, err = GetBlobsPath(layer.MergeBase) + if err != nil { + return nil, err + } + model.ModelPath = filename model.ParentModel = layer.From case "application/vnd.ollama.image.embed": @@ -349,10 +357,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c } for _, baseLayer := range baseLayers { - if quantization != "" && - baseLayer.MediaType == "application/vnd.ollama.image.model" && - baseLayer.GGML != nil && - baseLayer.GGML.Name() == "gguf" { + if quantization != "" && baseLayer.MediaType == "application/vnd.ollama.image.model" { ftype, err := llm.ParseFileType(quantization) if err != nil { return err @@ -393,6 +398,36 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount())) config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType()) config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture()) + + f, err := baseLayer.Layer.Open() + if err != nil { + return err + } + defer f.Close() + + metadata := io.NewSectionReader(f, 0, baseLayer.GGML.Offset()) + metadataLayer, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata") + if err != nil { + return err + } + metadataLayer.MergeBase = baseLayer.Digest + + layers = append(layers, metadataLayer) + + stat, err := f.Stat() + if err != nil { + return err + } + + data := io.NewSectionReader(f, baseLayer.GGML.Offset(), stat.Size()) + dataLayer, err := NewLayer(data, "application/vnd.ollama.image.model+data") + if err != nil { + return err + } + dataLayer.MergeBase = baseLayer.Digest + + layers = append(layers, dataLayer) + continue } layers = append(layers, baseLayer.Layer) @@ -524,8 +559,8 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c } for _, layer := range append(layers, layer) { - if layer.status != "" { - fn(api.ProgressResponse{Status: layer.status}) + if layer.message != "" { + fn(api.ProgressResponse{Status: layer.message}) } } @@ -627,6 +662,9 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{}, for _, layer := range manifest.Layers { delete(deleteMap, layer.Digest) + if layer.MergeBase != "" { + delete(deleteMap, layer.MergeBase) + } } delete(deleteMap, manifest.Config.Digest) @@ -733,6 +771,9 @@ func DeleteModel(name string) error { deleteMap := make(map[string]struct{}) for _, layer := range manifest.Layers { deleteMap[layer.Digest] = struct{}{} + if layer.MergeBase != "" { + deleteMap[layer.MergeBase] = struct{}{} + } } deleteMap[manifest.Config.Digest] = struct{}{} @@ -855,6 +896,43 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu } delete(deleteMap, manifest.Config.Digest) + type mergedLayer struct { + Metadata, Data *Layer + } + + mergedLayers := make(map[string]mergedLayer) + for _, layer := range manifest.Layers { + merged := mergedLayers[layer.MergeBase] + if layer.MediaType == "application/vnd.ollama.image.model+metadata" { + merged.Metadata = layer + } else if layer.MediaType == "application/vnd.ollama.image.model+data" { + merged.Data = layer + } else { + continue + } + + mergedLayers[layer.MergeBase] = merged + } + + for _, mergedLayer := range mergedLayers { + fn(api.ProgressResponse{Status: "merging layers"}) + metadata, err := mergedLayer.Metadata.Open() + if err != nil { + return err + } + defer metadata.Close() + + data, err := mergedLayer.Data.Open() + if err != nil { + return err + } + defer data.Close() + + if _, err := NewLayer(io.MultiReader(metadata, data), "application/vnd.ollama.image.model"); err != nil { + return err + } + } + fn(api.ProgressResponse{Status: "verifying sha256 digest"}) for _, layer := range layers { if err := verifyBlob(layer.Digest); err != nil { diff --git a/server/layer.go b/server/layer.go index dcca3854..9084e16e 100644 --- a/server/layer.go +++ b/server/layer.go @@ -12,7 +12,10 @@ type Layer struct { Digest string `json:"digest"` Size int64 `json:"size"` From string `json:"from,omitempty"` - status string + + MergeBase string `json:"merge_base,omitempty"` + + message string } func NewLayer(r io.Reader, mediatype string) (*Layer, error) { @@ -56,7 +59,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) { MediaType: mediatype, Digest: digest, Size: n, - status: fmt.Sprintf("%s %s", status, digest), + message: fmt.Sprintf("%s %s", status, digest), }, nil } @@ -76,11 +79,11 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) { Digest: digest, Size: fi.Size(), From: from, - status: fmt.Sprintf("using existing layer %s", digest), + message: fmt.Sprintf("using existing layer %s", digest), }, nil } -func (l *Layer) Open() (io.ReadCloser, error) { +func (l *Layer) Open() (*os.File, error) { blob, err := GetBlobsPath(l.Digest) if err != nil { return nil, err