split model layer into metadata and data layers
This commit is contained in:
parent
122b35c784
commit
41ae232e10
12
llm/ggla.go
12
llm/ggla.go
@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
|
|||||||
|
|
||||||
type ggla struct {
|
type ggla struct {
|
||||||
*containerGGLA
|
*containerGGLA
|
||||||
|
offset int64
|
||||||
|
|
||||||
kv KV
|
kv KV
|
||||||
tensors []*Tensor
|
tensors []*Tensor
|
||||||
@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
|
|||||||
return llm.tensors
|
return llm.tensors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *ggla) Offset() int64 {
|
||||||
|
return llm.offset
|
||||||
|
}
|
||||||
|
|
||||||
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
func (llm *ggla) decode(rs io.ReadSeeker) error {
|
||||||
var r uint32
|
var r uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
|
||||||
@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
|
|||||||
}
|
}
|
||||||
llm.kv["alpha"] = alpha
|
llm.kv["alpha"] = alpha
|
||||||
|
|
||||||
|
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
llm.offset = offset
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var dims uint32
|
var dims uint32
|
||||||
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
|
||||||
|
@ -16,6 +16,7 @@ type GGML struct {
|
|||||||
type model interface {
|
type model interface {
|
||||||
KV() KV
|
KV() KV
|
||||||
Tensors() Tensors
|
Tensors() Tensors
|
||||||
|
Offset() int64
|
||||||
}
|
}
|
||||||
|
|
||||||
type KV map[string]any
|
type KV map[string]any
|
||||||
|
11
llm/gguf.go
11
llm/gguf.go
@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
|
|||||||
|
|
||||||
model := newGGUF(c)
|
model := newGGUF(c)
|
||||||
slog.Debug(fmt.Sprintf("model = %#v", model))
|
slog.Debug(fmt.Sprintf("model = %#v", model))
|
||||||
if err := model.Decode(rs); err != nil {
|
if err := model.decode(rs); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,6 +90,7 @@ const (
|
|||||||
|
|
||||||
type gguf struct {
|
type gguf struct {
|
||||||
*containerGGUF
|
*containerGGUF
|
||||||
|
offset int64
|
||||||
|
|
||||||
kv KV
|
kv KV
|
||||||
tensors []*Tensor
|
tensors []*Tensor
|
||||||
@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors {
|
|||||||
return llm.tensors
|
return llm.tensors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (llm *gguf) Offset() int64 {
|
||||||
|
return llm.offset
|
||||||
|
}
|
||||||
|
|
||||||
func (llm *gguf) numTensor() uint64 {
|
func (llm *gguf) numTensor() uint64 {
|
||||||
switch llm.Version {
|
switch llm.Version {
|
||||||
case 1:
|
case 1:
|
||||||
@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
func (llm *gguf) decode(rs io.ReadSeeker) error {
|
||||||
// decode key-values
|
// decode key-values
|
||||||
for i := 0; uint64(i) < llm.numKV(); i++ {
|
for i := 0; uint64(i) < llm.numKV(); i++ {
|
||||||
k, err := readGGUFString(llm, rs)
|
k, err := readGGUFString(llm, rs)
|
||||||
@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llm.offset = offset + padding
|
||||||
|
|
||||||
for _, tensor := range llm.tensors {
|
for _, tensor := range llm.tensors {
|
||||||
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -208,6 +208,14 @@ func GetModel(name string) (*Model, error) {
|
|||||||
|
|
||||||
switch layer.MediaType {
|
switch layer.MediaType {
|
||||||
case "application/vnd.ollama.image.model":
|
case "application/vnd.ollama.image.model":
|
||||||
|
model.ModelPath = filename
|
||||||
|
model.ParentModel = layer.From
|
||||||
|
case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data":
|
||||||
|
filename, err = GetBlobsPath(layer.MergeBase)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
model.ModelPath = filename
|
model.ModelPath = filename
|
||||||
model.ParentModel = layer.From
|
model.ParentModel = layer.From
|
||||||
case "application/vnd.ollama.image.embed":
|
case "application/vnd.ollama.image.embed":
|
||||||
@ -349,10 +357,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, baseLayer := range baseLayers {
|
for _, baseLayer := range baseLayers {
|
||||||
if quantization != "" &&
|
if quantization != "" && baseLayer.MediaType == "application/vnd.ollama.image.model" {
|
||||||
baseLayer.MediaType == "application/vnd.ollama.image.model" &&
|
|
||||||
baseLayer.GGML != nil &&
|
|
||||||
baseLayer.GGML.Name() == "gguf" {
|
|
||||||
ftype, err := llm.ParseFileType(quantization)
|
ftype, err := llm.ParseFileType(quantization)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -393,6 +398,36 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
|||||||
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
|
||||||
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
|
config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
|
||||||
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
|
||||||
|
|
||||||
|
f, err := baseLayer.Layer.Open()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
metadata := io.NewSectionReader(f, 0, baseLayer.GGML.Offset())
|
||||||
|
metadataLayer, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
metadataLayer.MergeBase = baseLayer.Digest
|
||||||
|
|
||||||
|
layers = append(layers, metadataLayer)
|
||||||
|
|
||||||
|
stat, err := f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
data := io.NewSectionReader(f, baseLayer.GGML.Offset(), stat.Size())
|
||||||
|
dataLayer, err := NewLayer(data, "application/vnd.ollama.image.model+data")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
dataLayer.MergeBase = baseLayer.Digest
|
||||||
|
|
||||||
|
layers = append(layers, dataLayer)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
layers = append(layers, baseLayer.Layer)
|
layers = append(layers, baseLayer.Layer)
|
||||||
@ -524,8 +559,8 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, layer := range append(layers, layer) {
|
for _, layer := range append(layers, layer) {
|
||||||
if layer.status != "" {
|
if layer.message != "" {
|
||||||
fn(api.ProgressResponse{Status: layer.status})
|
fn(api.ProgressResponse{Status: layer.message})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -627,6 +662,9 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{},
|
|||||||
|
|
||||||
for _, layer := range manifest.Layers {
|
for _, layer := range manifest.Layers {
|
||||||
delete(deleteMap, layer.Digest)
|
delete(deleteMap, layer.Digest)
|
||||||
|
if layer.MergeBase != "" {
|
||||||
|
delete(deleteMap, layer.MergeBase)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delete(deleteMap, manifest.Config.Digest)
|
delete(deleteMap, manifest.Config.Digest)
|
||||||
@ -733,6 +771,9 @@ func DeleteModel(name string) error {
|
|||||||
deleteMap := make(map[string]struct{})
|
deleteMap := make(map[string]struct{})
|
||||||
for _, layer := range manifest.Layers {
|
for _, layer := range manifest.Layers {
|
||||||
deleteMap[layer.Digest] = struct{}{}
|
deleteMap[layer.Digest] = struct{}{}
|
||||||
|
if layer.MergeBase != "" {
|
||||||
|
deleteMap[layer.MergeBase] = struct{}{}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
deleteMap[manifest.Config.Digest] = struct{}{}
|
deleteMap[manifest.Config.Digest] = struct{}{}
|
||||||
|
|
||||||
@ -855,6 +896,43 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
|
|||||||
}
|
}
|
||||||
delete(deleteMap, manifest.Config.Digest)
|
delete(deleteMap, manifest.Config.Digest)
|
||||||
|
|
||||||
|
type mergedLayer struct {
|
||||||
|
Metadata, Data *Layer
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedLayers := make(map[string]mergedLayer)
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
merged := mergedLayers[layer.MergeBase]
|
||||||
|
if layer.MediaType == "application/vnd.ollama.image.model+metadata" {
|
||||||
|
merged.Metadata = layer
|
||||||
|
} else if layer.MediaType == "application/vnd.ollama.image.model+data" {
|
||||||
|
merged.Data = layer
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedLayers[layer.MergeBase] = merged
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, mergedLayer := range mergedLayers {
|
||||||
|
fn(api.ProgressResponse{Status: "merging layers"})
|
||||||
|
metadata, err := mergedLayer.Metadata.Open()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer metadata.Close()
|
||||||
|
|
||||||
|
data, err := mergedLayer.Data.Open()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer data.Close()
|
||||||
|
|
||||||
|
if _, err := NewLayer(io.MultiReader(metadata, data), "application/vnd.ollama.image.model"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn(api.ProgressResponse{Status: "verifying sha256 digest"})
|
fn(api.ProgressResponse{Status: "verifying sha256 digest"})
|
||||||
for _, layer := range layers {
|
for _, layer := range layers {
|
||||||
if err := verifyBlob(layer.Digest); err != nil {
|
if err := verifyBlob(layer.Digest); err != nil {
|
||||||
|
@ -12,7 +12,10 @@ type Layer struct {
|
|||||||
Digest string `json:"digest"`
|
Digest string `json:"digest"`
|
||||||
Size int64 `json:"size"`
|
Size int64 `json:"size"`
|
||||||
From string `json:"from,omitempty"`
|
From string `json:"from,omitempty"`
|
||||||
status string
|
|
||||||
|
MergeBase string `json:"merge_base,omitempty"`
|
||||||
|
|
||||||
|
message string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
||||||
@ -56,7 +59,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
|
|||||||
MediaType: mediatype,
|
MediaType: mediatype,
|
||||||
Digest: digest,
|
Digest: digest,
|
||||||
Size: n,
|
Size: n,
|
||||||
status: fmt.Sprintf("%s %s", status, digest),
|
message: fmt.Sprintf("%s %s", status, digest),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,11 +79,11 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
|
|||||||
Digest: digest,
|
Digest: digest,
|
||||||
Size: fi.Size(),
|
Size: fi.Size(),
|
||||||
From: from,
|
From: from,
|
||||||
status: fmt.Sprintf("using existing layer %s", digest),
|
message: fmt.Sprintf("using existing layer %s", digest),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *Layer) Open() (io.ReadCloser, error) {
|
func (l *Layer) Open() (*os.File, error) {
|
||||||
blob, err := GetBlobsPath(l.Digest)
|
blob, err := GetBlobsPath(l.Digest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
Loading…
x
Reference in New Issue
Block a user