split model layer into metadata and data layers

This commit is contained in:
Michael Yang 2024-05-01 11:08:04 -07:00
parent 122b35c784
commit 41ae232e10
5 changed files with 113 additions and 12 deletions

View File

@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
type ggla struct {
*containerGGLA
offset int64
kv KV
tensors []*Tensor
@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
return llm.tensors
}
func (llm *ggla) Offset() int64 {
return llm.offset
}
func (llm *ggla) decode(rs io.ReadSeeker) error {
var r uint32
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
}
llm.kv["alpha"] = alpha
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
llm.offset = offset
for {
var dims uint32
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {

View File

@ -16,6 +16,7 @@ type GGML struct {
type model interface {
KV() KV
Tensors() Tensors
Offset() int64
}
type KV map[string]any

View File

@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
model := newGGUF(c)
slog.Debug(fmt.Sprintf("model = %#v", model))
if err := model.Decode(rs); err != nil {
if err := model.decode(rs); err != nil {
return nil, err
}
@ -90,6 +90,7 @@ const (
type gguf struct {
*containerGGUF
offset int64
kv KV
tensors []*Tensor
@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors {
return llm.tensors
}
func (llm *gguf) Offset() int64 {
return llm.offset
}
func (llm *gguf) numTensor() uint64 {
switch llm.Version {
case 1:
@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 {
}
}
func (llm *gguf) Decode(rs io.ReadSeeker) error {
func (llm *gguf) decode(rs io.ReadSeeker) error {
// decode key-values
for i := 0; uint64(i) < llm.numKV(); i++ {
k, err := readGGUFString(llm, rs)
@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
return err
}
llm.offset = offset + padding
for _, tensor := range llm.tensors {
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
return err

View File

@ -208,6 +208,14 @@ func GetModel(name string) (*Model, error) {
switch layer.MediaType {
case "application/vnd.ollama.image.model":
model.ModelPath = filename
model.ParentModel = layer.From
case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data":
filename, err = GetBlobsPath(layer.MergeBase)
if err != nil {
return nil, err
}
model.ModelPath = filename
model.ParentModel = layer.From
case "application/vnd.ollama.image.embed":
@ -349,10 +357,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
}
for _, baseLayer := range baseLayers {
if quantization != "" &&
baseLayer.MediaType == "application/vnd.ollama.image.model" &&
baseLayer.GGML != nil &&
baseLayer.GGML.Name() == "gguf" {
if quantization != "" && baseLayer.MediaType == "application/vnd.ollama.image.model" {
ftype, err := llm.ParseFileType(quantization)
if err != nil {
return err
@ -393,6 +398,36 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
f, err := baseLayer.Layer.Open()
if err != nil {
return err
}
defer f.Close()
metadata := io.NewSectionReader(f, 0, baseLayer.GGML.Offset())
metadataLayer, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata")
if err != nil {
return err
}
metadataLayer.MergeBase = baseLayer.Digest
layers = append(layers, metadataLayer)
stat, err := f.Stat()
if err != nil {
return err
}
data := io.NewSectionReader(f, baseLayer.GGML.Offset(), stat.Size())
dataLayer, err := NewLayer(data, "application/vnd.ollama.image.model+data")
if err != nil {
return err
}
dataLayer.MergeBase = baseLayer.Digest
layers = append(layers, dataLayer)
continue
}
layers = append(layers, baseLayer.Layer)
@ -524,8 +559,8 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
}
for _, layer := range append(layers, layer) {
if layer.status != "" {
fn(api.ProgressResponse{Status: layer.status})
if layer.message != "" {
fn(api.ProgressResponse{Status: layer.message})
}
}
@ -627,6 +662,9 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{},
for _, layer := range manifest.Layers {
delete(deleteMap, layer.Digest)
if layer.MergeBase != "" {
delete(deleteMap, layer.MergeBase)
}
}
delete(deleteMap, manifest.Config.Digest)
@ -733,6 +771,9 @@ func DeleteModel(name string) error {
deleteMap := make(map[string]struct{})
for _, layer := range manifest.Layers {
deleteMap[layer.Digest] = struct{}{}
if layer.MergeBase != "" {
deleteMap[layer.MergeBase] = struct{}{}
}
}
deleteMap[manifest.Config.Digest] = struct{}{}
@ -855,6 +896,43 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
}
delete(deleteMap, manifest.Config.Digest)
type mergedLayer struct {
Metadata, Data *Layer
}
mergedLayers := make(map[string]mergedLayer)
for _, layer := range manifest.Layers {
merged := mergedLayers[layer.MergeBase]
if layer.MediaType == "application/vnd.ollama.image.model+metadata" {
merged.Metadata = layer
} else if layer.MediaType == "application/vnd.ollama.image.model+data" {
merged.Data = layer
} else {
continue
}
mergedLayers[layer.MergeBase] = merged
}
for _, mergedLayer := range mergedLayers {
fn(api.ProgressResponse{Status: "merging layers"})
metadata, err := mergedLayer.Metadata.Open()
if err != nil {
return err
}
defer metadata.Close()
data, err := mergedLayer.Data.Open()
if err != nil {
return err
}
defer data.Close()
if _, err := NewLayer(io.MultiReader(metadata, data), "application/vnd.ollama.image.model"); err != nil {
return err
}
}
fn(api.ProgressResponse{Status: "verifying sha256 digest"})
for _, layer := range layers {
if err := verifyBlob(layer.Digest); err != nil {

View File

@ -12,7 +12,10 @@ type Layer struct {
Digest string `json:"digest"`
Size int64 `json:"size"`
From string `json:"from,omitempty"`
status string
MergeBase string `json:"merge_base,omitempty"`
message string
}
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
@ -56,7 +59,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
MediaType: mediatype,
Digest: digest,
Size: n,
status: fmt.Sprintf("%s %s", status, digest),
message: fmt.Sprintf("%s %s", status, digest),
}, nil
}
@ -76,11 +79,11 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
Digest: digest,
Size: fi.Size(),
From: from,
status: fmt.Sprintf("using existing layer %s", digest),
message: fmt.Sprintf("using existing layer %s", digest),
}, nil
}
func (l *Layer) Open() (io.ReadCloser, error) {
func (l *Layer) Open() (*os.File, error) {
blob, err := GetBlobsPath(l.Digest)
if err != nil {
return nil, err