split model layer into metadata and data layers
This commit is contained in:
parent
122b35c784
commit
41ae232e10
12
llm/ggla.go
12
llm/ggla.go
@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
|
||||
|
||||
type ggla struct {
|
||||
*containerGGLA
|
||||
offset int64
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *ggla) Offset() int64 {
|
||||
return llm.offset
|
||||
}
|
||||
|
||||
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
var r uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||
@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||
}
|
||||
llm.kv["alpha"] = alpha
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
llm.offset = offset
|
||||
|
||||
for {
|
||||
var dims uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||
|
@ -16,6 +16,7 @@ type GGML struct {
|
||||
type model interface {
|
||||
KV() KV
|
||||
Tensors() Tensors
|
||||
Offset() int64
|
||||
}
|
||||
|
||||
type KV map[string]any
|
||||
|
11
llm/gguf.go
11
llm/gguf.go
@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
||||
|
||||
model := newGGUF(c)
|
||||
slog.Debug(fmt.Sprintf("model = %#v", model))
|
||||
if err := model.Decode(rs); err != nil {
|
||||
if err := model.decode(rs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -90,6 +90,7 @@ const (
|
||||
|
||||
type gguf struct {
|
||||
*containerGGUF
|
||||
offset int64
|
||||
|
||||
kv KV
|
||||
tensors []*Tensor
|
||||
@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors {
|
||||
return llm.tensors
|
||||
}
|
||||
|
||||
func (llm *gguf) Offset() int64 {
|
||||
return llm.offset
|
||||
}
|
||||
|
||||
func (llm *gguf) numTensor() uint64 {
|
||||
switch llm.Version {
|
||||
case 1:
|
||||
@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 {
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
func (llm *gguf) decode(rs io.ReadSeeker) error {
|
||||
// decode key-values
|
||||
for i := 0; uint64(i) < llm.numKV(); i++ {
|
||||
k, err := readGGUFString(llm, rs)
|
||||
@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
||||
return err
|
||||
}
|
||||
|
||||
llm.offset = offset + padding
|
||||
|
||||
for _, tensor := range llm.tensors {
|
||||
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||
return err
|
||||
|
@ -208,6 +208,14 @@ func GetModel(name string) (*Model, error) {
|
||||
|
||||
switch layer.MediaType {
|
||||
case "application/vnd.ollama.image.model":
|
||||
model.ModelPath = filename
|
||||
model.ParentModel = layer.From
|
||||
case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data":
|
||||
filename, err = GetBlobsPath(layer.MergeBase)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
model.ModelPath = filename
|
||||
model.ParentModel = layer.From
|
||||
case "application/vnd.ollama.image.embed":
|
||||
@ -349,10 +357,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
}
|
||||
|
||||
for _, baseLayer := range baseLayers {
|
||||
if quantization != "" &&
|
||||
baseLayer.MediaType == "application/vnd.ollama.image.model" &&
|
||||
baseLayer.GGML != nil &&
|
||||
baseLayer.GGML.Name() == "gguf" {
|
||||
if quantization != "" && baseLayer.MediaType == "application/vnd.ollama.image.model" {
|
||||
ftype, err := llm.ParseFileType(quantization)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -393,6 +398,36 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
||||
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
|
||||
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
||||
|
||||
f, err := baseLayer.Layer.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
metadata := io.NewSectionReader(f, 0, baseLayer.GGML.Offset())
|
||||
metadataLayer, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
metadataLayer.MergeBase = baseLayer.Digest
|
||||
|
||||
layers = append(layers, metadataLayer)
|
||||
|
||||
stat, err := f.Stat()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data := io.NewSectionReader(f, baseLayer.GGML.Offset(), stat.Size())
|
||||
dataLayer, err := NewLayer(data, "application/vnd.ollama.image.model+data")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dataLayer.MergeBase = baseLayer.Digest
|
||||
|
||||
layers = append(layers, dataLayer)
|
||||
continue
|
||||
}
|
||||
|
||||
layers = append(layers, baseLayer.Layer)
|
||||
@ -524,8 +559,8 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
||||
}
|
||||
|
||||
for _, layer := range append(layers, layer) {
|
||||
if layer.status != "" {
|
||||
fn(api.ProgressResponse{Status: layer.status})
|
||||
if layer.message != "" {
|
||||
fn(api.ProgressResponse{Status: layer.message})
|
||||
}
|
||||
}
|
||||
|
||||
@ -627,6 +662,9 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{},
|
||||
|
||||
for _, layer := range manifest.Layers {
|
||||
delete(deleteMap, layer.Digest)
|
||||
if layer.MergeBase != "" {
|
||||
delete(deleteMap, layer.MergeBase)
|
||||
}
|
||||
}
|
||||
|
||||
delete(deleteMap, manifest.Config.Digest)
|
||||
@ -733,6 +771,9 @@ func DeleteModel(name string) error {
|
||||
deleteMap := make(map[string]struct{})
|
||||
for _, layer := range manifest.Layers {
|
||||
deleteMap[layer.Digest] = struct{}{}
|
||||
if layer.MergeBase != "" {
|
||||
deleteMap[layer.MergeBase] = struct{}{}
|
||||
}
|
||||
}
|
||||
deleteMap[manifest.Config.Digest] = struct{}{}
|
||||
|
||||
@ -855,6 +896,43 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
||||
}
|
||||
delete(deleteMap, manifest.Config.Digest)
|
||||
|
||||
type mergedLayer struct {
|
||||
Metadata, Data *Layer
|
||||
}
|
||||
|
||||
mergedLayers := make(map[string]mergedLayer)
|
||||
for _, layer := range manifest.Layers {
|
||||
merged := mergedLayers[layer.MergeBase]
|
||||
if layer.MediaType == "application/vnd.ollama.image.model+metadata" {
|
||||
merged.Metadata = layer
|
||||
} else if layer.MediaType == "application/vnd.ollama.image.model+data" {
|
||||
merged.Data = layer
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
mergedLayers[layer.MergeBase] = merged
|
||||
}
|
||||
|
||||
for _, mergedLayer := range mergedLayers {
|
||||
fn(api.ProgressResponse{Status: "merging layers"})
|
||||
metadata, err := mergedLayer.Metadata.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer metadata.Close()
|
||||
|
||||
data, err := mergedLayer.Data.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer data.Close()
|
||||
|
||||
if _, err := NewLayer(io.MultiReader(metadata, data), "application/vnd.ollama.image.model"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "verifying sha256 digest"})
|
||||
for _, layer := range layers {
|
||||
if err := verifyBlob(layer.Digest); err != nil {
|
||||
|
@ -12,7 +12,10 @@ type Layer struct {
|
||||
Digest string `json:"digest"`
|
||||
Size int64 `json:"size"`
|
||||
From string `json:"from,omitempty"`
|
||||
status string
|
||||
|
||||
MergeBase string `json:"merge_base,omitempty"`
|
||||
|
||||
message string
|
||||
}
|
||||
|
||||
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||
@ -56,7 +59,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||
MediaType: mediatype,
|
||||
Digest: digest,
|
||||
Size: n,
|
||||
status: fmt.Sprintf("%s %s", status, digest),
|
||||
message: fmt.Sprintf("%s %s", status, digest),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -76,11 +79,11 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
|
||||
Digest: digest,
|
||||
Size: fi.Size(),
|
||||
From: from,
|
||||
status: fmt.Sprintf("using existing layer %s", digest),
|
||||
message: fmt.Sprintf("using existing layer %s", digest),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (l *Layer) Open() (io.ReadCloser, error) {
|
||||
func (l *Layer) Open() (*os.File, error) {
|
||||
blob, err := GetBlobsPath(l.Digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
Loading…
x
Reference in New Issue
Block a user