Compare commits

...

24 Commits

Author SHA1 Message Date
Josh Yan
309307c8f9 update test, remove comments 2024-07-17 10:46:50 -07:00
Josh Yan
f378058b51 whitespace 2024-07-16 16:45:41 -07:00
Josh Yan
574c3540cd reorder imports 2024-07-16 16:44:04 -07:00
Josh
d069cf753b
Merge branch 'main' into jyan/reord-g 2024-07-16 16:42:49 -07:00
Josh Yan
64405525b4 clean up 2024-07-16 16:40:38 -07:00
Josh Yan
dea2204b82 rmv comments 2024-07-16 16:37:50 -07:00
Josh Yan
6ee22d5080 clean 2024-07-16 16:35:15 -07:00
Josh Yan
703ecccc6b clean 2024-07-16 14:17:44 -07:00
Josh Yan
873f334783 IT WORKS 2024-07-16 14:12:07 -07:00
Josh Yan
fa49bfc0bd FIXED TESTS 2024-07-16 12:14:10 -07:00
Josh Yan
fc1b3ee9bf test 2024-07-16 11:21:13 -07:00
Josh Yan
25be20949c test 2024-07-15 15:08:24 -07:00
Josh Yan
903e9df46f test 2024-07-15 11:46:49 -07:00
Josh Yan
40c0f9612e unneccesary 2024-07-14 18:41:16 -07:00
Josh Yan
15a0215203 running 2024-07-12 16:49:57 -07:00
Josh Yan
faa3c937cf writeto 2024-07-12 15:37:27 -07:00
Josh Yan
cf57246aba write 2024-07-12 12:59:51 -07:00
Josh Yan
6fafe4f753 gguf 2024-07-12 12:58:00 -07:00
Josh Yan
d7c8d4f3f4 ggufwritekv 2024-07-12 12:25:13 -07:00
Josh Yan
3d0fd31f0e TensorWriter 2024-07-12 12:18:46 -07:00
Josh Yan
554f3bdc0e interface 2024-07-12 10:02:58 -07:00
Josh Yan
e75fb73839 types 2024-07-12 09:42:10 -07:00
Josh Yan
2fdebffc8d sawp 2024-07-11 18:18:26 -07:00
Josh Yan
29ecfe493b write 2024-07-11 17:56:51 -07:00
5 changed files with 514 additions and 31 deletions

View File

@ -36,6 +36,7 @@ type ggla struct {
kv KV
tensors []*Tensor
offset int64
}
func newGGLA(container *containerGGLA) *ggla {
@ -50,7 +51,10 @@ func (llm *ggla) KV() KV {
}
func (llm *ggla) Tensors() Tensors {
return llm.tensors
return Tensors{
Items: llm.tensors,
Offset: llm.offset,
}
}
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {

View File

@ -112,11 +112,38 @@ func (kv KV) ChatTemplate() string {
return s
}
type Tensors []*Tensor
// Tensors type as a slice of pointers to Tensor
// type Tensors []*Tensor
type Tensors struct {
Items []*Tensor
Offset int64
}
// Implement the Len method
func (ts Tensors) Len() int {
return len(ts.Items)
}
// Implement the Swap method
func (ts Tensors) Swap(i, j int) {
ts.Items[i], ts.Items[j] = ts.Items[j], ts.Items[i]
}
// Implement the Less method
func (ts Tensors) Less(i, j int) bool {
var x, y int
if n, err := fmt.Sscanf(ts.Items[i].Name, "blk.%d", &x); err != nil || n != 1 {
return ts.Items[i].Name < ts.Items[j].Name
} else if n, err := fmt.Sscanf(ts.Items[j].Name, "blk.%d", &y); err != nil || n != 1 {
return ts.Items[i].Name < ts.Items[j].Name
}
return x < y
}
func (ts Tensors) Layers() map[string]Layer {
layers := make(map[string]Layer)
for _, t := range ts {
for _, t := range ts.Items {
parts := strings.Split(t.Name, ".")
if parts[0] == "blk" {
// join first and second part, e.g. blk.%d
@ -454,3 +481,11 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
return
}
type TensorWriter struct {
io.Reader
}
func (tw TensorWriter) WriteTo(w io.Writer) (int64, error) {
return io.Copy(w, tw.Reader)
}

View File

@ -6,7 +6,12 @@ import (
"encoding/json"
"fmt"
"io"
"log/slog"
"slices"
"sort"
"strings"
"golang.org/x/exp/maps"
)
type containerGGUF struct {
@ -87,6 +92,7 @@ type gguf struct {
kv KV
tensors []*Tensor
offset int64
parameters uint64
@ -109,7 +115,10 @@ func (llm *gguf) KV() KV {
}
func (llm *gguf) Tensors() Tensors {
return llm.tensors
return Tensors{
Items: llm.tensors,
Offset: llm.offset,
}
}
func (llm *gguf) numTensor() uint64 {
@ -199,12 +208,13 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
return fmt.Errorf("failed to read tensor dimensions: %w", err)
}
shape := [4]uint64{1, 1, 1, 1}
shape := []uint64{}
for i := 0; uint32(i) < dims; i++ {
shape[i], err = readGGUF[uint64](llm, rs)
shapeVal, err := readGGUF[uint64](llm, rs)
if err != nil {
return fmt.Errorf("failed to read tensor shape: %w", err)
}
shape = append(shape, shapeVal)
}
kind, err := readGGUF[uint32](llm, rs)
@ -221,7 +231,7 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
Name: name,
Kind: kind,
Offset: offset,
Shape: shape[:],
Shape: shape,
}
llm.tensors = append(llm.tensors, &tensor)
@ -236,6 +246,14 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
alignment = 32
}
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return fmt.Errorf("failed to get current offset: %w", err)
}
// align to next 32-byte boundary
llm.offset = offset + llm.padding(offset, int64(alignment))
for _, tensor := range llm.tensors {
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
@ -261,12 +279,12 @@ func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
return t, err
}
func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
func writeGGUF[V any](w io.Writer, t uint32, v V) error {
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
return err
}
return binary.Write(w, llm.ByteOrder, v)
return binary.Write(w, binary.LittleEndian, v)
}
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
@ -330,12 +348,12 @@ func readGGUFString(llm *gguf, r io.Reader) (string, error) {
return string(buf), nil
}
func writeGGUFString(llm *gguf, w io.Writer, s string) error {
if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
func writeGGUFString(w io.Writer, s string) error {
if err := binary.Write(w, binary.LittleEndian, ggufTypeString); err != nil {
return err
}
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
return err
}
@ -344,8 +362,9 @@ func writeGGUFString(llm *gguf, w io.Writer, s string) error {
}
type array struct {
size int
values []any
size int
values []any
datatype uint32
}
func (a *array) MarshalJSON() ([]byte, error) {
@ -425,7 +444,7 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
return nil, err
}
a := &array{size: int(n)}
a := &array{size: int(n), datatype: t}
if llm.canCollectArray(int(n)) {
a.values = make([]any, int(n))
}
@ -476,21 +495,21 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
return a, nil
}
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
return err
}
if err := binary.Write(w, llm.ByteOrder, t); err != nil {
if err := binary.Write(w, binary.LittleEndian, t); err != nil {
return err
}
if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
if err := binary.Write(w, binary.LittleEndian, uint64(len(s))); err != nil {
return err
}
for _, e := range s {
if err := binary.Write(w, llm.ByteOrder, e); err != nil {
if err := binary.Write(w, binary.LittleEndian, e); err != nil {
return err
}
}
@ -589,19 +608,19 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
var err error
switch v := v.(type) {
case uint32:
err = writeGGUF(llm, ws, ggufTypeUint32, v)
err = writeGGUF(ws, ggufTypeUint32, v)
case float32:
err = writeGGUF(llm, ws, ggufTypeFloat32, v)
err = writeGGUF(ws, ggufTypeFloat32, v)
case bool:
err = writeGGUF(llm, ws, ggufTypeBool, v)
err = writeGGUF(ws, ggufTypeBool, v)
case string:
err = writeGGUFString(llm, ws, v)
err = writeGGUFString(ws, v)
case []int32:
err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
err = writeGGUFArray(ws, ggufTypeInt32, v)
case []uint32:
err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
err = writeGGUFArray(ws, ggufTypeUint32, v)
case []float32:
err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
err = writeGGUFArray(ws, ggufTypeFloat32, v)
case []string:
if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
return err
@ -634,7 +653,7 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
for k, v := range kvCheck {
if !v {
return fmt.Errorf("Didn't know how to write kv %s", k)
return fmt.Errorf("didn't know how to write kv %s", k)
}
}
@ -696,3 +715,208 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
func (gguf) padding(offset, align int64) int64 {
return (align - offset%align) % align
}
// Reader and WriterTof
type GGUFWriter struct {
KV
Tensors
}
type writeOffset struct {
io.Writer
offset int
}
func (wo *writeOffset) Write(p []byte) (int, error) {
n, err := wo.Writer.Write(p)
wo.offset += n
return n, err
}
var _ io.Reader = (*GGUFWriter)(nil)
var _ io.WriterTo = (*GGUFWriter)(nil)
func (GGUFWriter) Read([]byte) (int, error) {
panic("not implemeneted")
}
func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
wo := &writeOffset{Writer: w}
if err := binary.Write(wo, binary.LittleEndian, []byte("GGUF")); err != nil {
return 0, err
}
if err := binary.Write(wo, binary.LittleEndian, uint32(3)); err != nil {
return 0, err
}
if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors.Items))); err != nil {
return 0, err
}
if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.KV)-1)); err != nil {
return 0, err
}
keys := maps.Keys(gguf.KV)
slices.Sort(keys)
for _, key := range keys {
switch key {
case "general.parameter_count":
// don't write general param count as its added in by us
continue
default:
if err := ggufWriteKV(wo, key, gguf.KV[key]); err != nil {
return 0, err
}
}
}
sort.Sort(gguf.Tensors)
var s uint64
for _, t := range gguf.Tensors.Items {
t.Offset = s
if err := ggufWriteTensorInfo(wo, t); err != nil {
return 0, err
}
s += t.Size()
}
tensorOffset := wo.offset
for _, t := range gguf.Tensors.Items {
if err := ggufWriteTensor(wo, t, wo.offset); err != nil {
return 0, err
}
}
return int64(tensorOffset), nil
}
func ggufWriteTensorInfo(ws io.Writer, t *Tensor) error {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, []byte(t.Name)); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint32(len(t.Shape))); err != nil {
return err
}
for i := range len(t.Shape) {
if err := binary.Write(ws, binary.LittleEndian, t.Shape[len(t.Shape)-i-1]); err != nil {
return err
}
}
if err := binary.Write(ws, binary.LittleEndian, t.Kind); err != nil {
return err
}
return binary.Write(ws, binary.LittleEndian, t.Offset)
}
func ggufWriteTensor(ws io.Writer, t *Tensor, offset int) error {
slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil {
return err
}
_, err := t.WriteTo(ws)
return err
}
func ggufWriteKV(ws io.Writer, k string, v any) error {
slog.Debug(k, "type", fmt.Sprintf("%T", v))
if err := binary.Write(ws, binary.LittleEndian, uint64(len(k))); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, []byte(k)); err != nil {
return err
}
var err error
switch v := v.(type) {
case uint32:
err = writeGGUF(ws, ggufTypeUint32, v)
case float32:
err = writeGGUF(ws, ggufTypeFloat32, v)
case bool:
err = writeGGUF(ws, ggufTypeBool, v)
case string:
err = writeGGUFString(ws, v)
case []int32:
err = writeGGUFArray(ws, ggufTypeInt32, v)
case []uint32:
err = writeGGUFArray(ws, ggufTypeUint32, v)
case []float32:
err = writeGGUFArray(ws, ggufTypeFloat32, v)
case []string:
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint64(len(v))); err != nil {
return err
}
for _, e := range v {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e))); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, []byte(e)); err != nil {
return err
}
}
case *array:
if v.size > 0 {
switch v.values[0].(type) {
case string:
if err := binary.Write(ws, binary.LittleEndian, ggufTypeArray); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, ggufTypeString); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, uint64(v.size)); err != nil {
return err
}
for _, e := range v.values {
if err := binary.Write(ws, binary.LittleEndian, uint64(len(e.(string)))); err != nil {
return err
}
if err := binary.Write(ws, binary.LittleEndian, []byte(e.(string))); err != nil {
return err
}
}
default:
err = writeGGUFArray(ws, v.datatype, v.values)
}
}
default:
return fmt.Errorf("improper type for '%s'", k)
}
return err
}
func ggufPadding(offset, align int64) int64 {
// we mod twice in the case offset%align = 0
return (align - offset%align) % align
}

187
llm/gguf_test.go Normal file
View File

@ -0,0 +1,187 @@
package llm
import (
"crypto/sha256"
"fmt"
"io"
"math"
"os"
"path/filepath"
"testing"
"github.com/google/go-cmp/cmp"
)
// TestGGUFDecode tests the decoding and rewriting of (unsorted) GGUF files
// To run, add GGUF files to /llm/testdata and add the name of the file to the tests slice
// This creates a temporary file in /llm/testdata that will deleted only if the test passes
// Note: map[Tensor.Name + " offset"] is commented since sorting will reorder the tensors
// Comment out sort.Sort(gguf.Tensors) in gguf.go to test offsets
func TestGGUFRewrite(t *testing.T) {
tests := []string{
"phi3.gguf",
}
for i := range tests {
tt := tests[i]
t.Run(tt, func(t *testing.T) {
t.Parallel()
p := filepath.Join("testdata", tt)
if _, err := os.Stat(p); err != nil {
t.Skip("file not found", p)
}
wantFile, err := os.Open(p)
if err != nil {
t.Fatal(err)
}
defer wantFile.Close()
// decode original gguf
_, wantGGML, err := decodeGGML(t, wantFile)
if err != nil {
t.Fatal(err)
}
gotFile, err := os.CreateTemp("testdata", tt)
if err != nil {
t.Fatal(err)
}
defer func() {
gotFile.Close()
if !t.Failed() {
os.Remove(gotFile.Name())
}
}()
_, gotGGML, err := rewriteGGML(t, wantGGML, gotFile, wantFile)
if err != nil {
t.Fatal(err)
}
diff, diff2 := compareGGML(t, gotGGML, wantGGML, gotFile, wantFile)
if cmp.Diff(diff, diff2) != "" {
t.Fatalf("diff: \n%s", cmp.Diff(diff, diff2))
}
})
}
}
func compareGGML(t *testing.T, gotGGML, wantGGML *GGML, f *os.File, f2 *os.File) (map[string]string, map[string]string) {
got := make(map[string]string)
want := make(map[string]string)
gotKV := gotGGML.KV()
wantKV := wantGGML.KV()
if len(gotKV) != len(wantKV) {
t.Fatalf("got length: %d != want length: %d", len(gotKV), len(wantKV))
}
for k, v := range gotKV {
switch t := v.(type) {
case *array:
if diffy := cmp.Diff(t.values, wantKV[k].(*array).values); diffy != "" {
got[k] = diffy
}
default:
if v != wantKV[k] {
got[k] = fmt.Sprintf("kv1: %v, kv2: %v", v, want[k])
}
}
}
gotTensors := gotGGML.Tensors().Items
gotOffset := gotGGML.Tensors().Offset
wantTensors := wantGGML.Tensors().Items
wantOffset := wantGGML.Tensors().Offset
if len(gotTensors) != len(wantTensors) {
got["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(gotTensors), len(wantTensors))
}
for _, tensor := range gotTensors {
sha256sum := sha256.New()
sr := io.NewSectionReader(f, gotOffset+int64(tensor.Offset), int64(tensor.Size()))
var s int64
s, err := io.Copy(sha256sum, sr)
if err != nil {
t.Fatalf("error: %v", err)
}
got[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
got[tensor.Name+" size"] = fmt.Sprintf("%d", s)
// got[tensor.Name+" offset"] = fmt.Sprintf("%v", tensor.Offset)
}
for _, tensor := range wantTensors {
sha256sum := sha256.New()
var s int64
sr := io.NewSectionReader(f2, wantOffset +int64(tensor.Offset), int64(tensor.Size()))
s, err := io.Copy(sha256sum, sr)
if err != nil {
t.Fatalf("error: %v", err)
}
want[tensor.Name] = fmt.Sprintf("%x", sha256sum.Sum(nil))
want[tensor.Name+" size"] = fmt.Sprintf("%d", s)
// want[tensor.Name+" offset"] = fmt.Sprintf("%v", tensor.Offset)
}
return got, want
}
func decodeGGML(t *testing.T, f *os.File) (int64, *GGML, error) {
ggml, n, err := DecodeGGML(f, math.MaxInt)
if err != nil {
t.Fatal(err)
}
return n, ggml, nil
}
func rewriteGGML(t *testing.T, ggml *GGML, gotFile *os.File, wantFile *os.File) (int64, *GGML, error) {
var tensors []*Tensor
for _, tensor := range ggml.Tensors().Items {
shape := make([]uint64, len(tensor.Shape))
for i := range len(tensor.Shape) {
shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
}
tensors = append(tensors, &Tensor{
Name: tensor.Name,
Kind: tensor.Kind,
Shape: shape,
WriterTo: TensorWriter{
Reader: io.NewSectionReader(wantFile, ggml.Tensors().Offset+int64(tensor.Offset), int64(tensor.Size())),
},
})
}
reader := &GGUFWriter{
KV: ggml.KV(),
Tensors: Tensors{
Items: tensors,
Offset: ggml.Tensors().Offset,
},
}
n, err := io.Copy(gotFile, reader)
if err != nil {
t.Fatal(err)
}
file, err := os.Open(gotFile.Name())
if err != nil {
t.Fatal(err)
}
ggml2, _, err := DecodeGGML(file, math.MaxInt)
if err != nil {
t.Fatal(err)
}
return n, ggml2, nil
}

View File

@ -13,6 +13,7 @@ import (
"os"
"path/filepath"
"slices"
"sort"
"strings"
"text/template/parse"
@ -231,7 +232,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
var offset int64
for offset < stat.Size() {
ggml, n, err := llm.DecodeGGML(file, 0)
ggml, n, err := llm.DecodeGGML(file, -1)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
@ -245,7 +246,39 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
mediatype = "application/vnd.ollama.image.projector"
}
layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
var reader io.Reader = io.NewSectionReader(file, offset, n)
if !sort.IsSorted(ggml.Tensors()) {
// create a new Tensors containing Tensors that have a writeTo
var tensors []*llm.Tensor
ggmlTensors := ggml.Tensors()
for _, tensor := range ggmlTensors.Items {
shape := make([]uint64, len(tensor.Shape))
for i := range len(tensor.Shape) {
shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
}
tensors = append(tensors, &llm.Tensor{
Name: tensor.Name,
Kind: tensor.Kind,
Shape: shape,
WriterTo: &llm.TensorWriter{
Reader: io.NewSectionReader(file, offset+ggmlTensors.Offset+int64(tensor.Offset), int64(tensor.Size())),
},
})
}
reader = &llm.GGUFWriter{
KV: ggml.KV(),
Tensors: llm.Tensors{
Items: tensors,
Offset: ggmlTensors.Offset,
},
}
}
layer, err := NewLayer(reader, mediatype)
if err != nil {
return nil, err
}