diff --git a/convert/convert.go b/convert/convert.go index 4ad64d72..30c5a53f 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -40,13 +40,13 @@ func (Parameters) KV(t *Tokenizer) llm.KV { return kv } -func (Parameters) specialTypes() []string { +func (Parameters) specialTokenTypes() []string { return []string{ "bos", "eos", "unk", "sep", "pad", "cls", "mask", } } -func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []*llm.Tensor) error { +func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { return llm.WriteGGUF(ws, kv, ts) } @@ -54,24 +54,27 @@ type Converter interface { // KV maps parameters to LLM key-values KV(*Tokenizer) llm.KV // Tensors maps input tensors to LLM tensors. Model specific modifications can be done here. - Tensors([]Tensor) []*llm.Tensor + Tensors([]Tensor) []llm.Tensor // tensorName returns the LLM tensor name for a specific input name tensorName(string) string - // specialTypes returns any special token types the model uses - specialTypes() []string - writeFile(io.WriteSeeker, llm.KV, []*llm.Tensor) error + // specialTokenTypes returns any special token types the model uses + specialTokenTypes() []string + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error } -func Convert(d string, ws io.WriteSeeker) error { - f, err := os.Open(filepath.Join(d, "config.json")) +// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations +// and files it finds in the input path. +// Supported input model formats include safetensors. +// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. +func Convert(path string, ws io.WriteSeeker) error { + bts, err := os.ReadFile(filepath.Join(path, "config.json")) if err != nil { return err } - defer f.Close() var p Parameters - if err := json.NewDecoder(f).Decode(&p); err != nil { + if err := json.Unmarshal(bts, &p); err != nil { return err } @@ -79,28 +82,23 @@ func Convert(d string, ws io.WriteSeeker) error { return errors.New("unknown architecture") } - var c Converter + var conv Converter switch p.Architectures[0] { case "LlamaForCausalLM", "MistralForCausalLM": - c = &llama{} + conv = &llama{} case "MixtralForCausalLM": - c = &mixtral{} + conv = &mixtral{} case "GemmaForCausalLM": - c = &gemma{} + conv = &gemma{} default: return errors.New("unsupported architecture") } - bts, err := os.ReadFile(filepath.Join(d, "config.json")) - if err != nil { + if err := json.Unmarshal(bts, conv); err != nil { return err } - if err := json.Unmarshal(bts, c); err != nil { - return err - } - - t, err := parseTokenizer(d, c.specialTypes()) + t, err := parseTokenizer(path, conv.specialTokenTypes()) if err != nil { return err } @@ -112,12 +110,14 @@ func Convert(d string, ws io.WriteSeeker) error { t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1) t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined) } + } else { + slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens)) } - ts, err := parseTensors(d) + ts, err := parseTensors(path) if err != nil { return err } - return c.writeFile(ws, c.KV(t), c.Tensors(ts)) + return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) } diff --git a/convert/convert_gemma.go b/convert/convert_gemma.go index 332fee7f..9213e157 100644 --- a/convert/convert_gemma.go +++ b/convert/convert_gemma.go @@ -43,15 +43,15 @@ func (p *gemma) KV(t *Tokenizer) llm.KV { return kv } -func (p *gemma) Tensors(ts []Tensor) []*llm.Tensor { - var out []*llm.Tensor +func (p *gemma) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor for _, t := range ts { name := p.tensorName(t.Name()) if strings.HasSuffix(name, "_norm.weight") { t.SetRepacker(p.addOne) } - out = append(out, &llm.Tensor{ + out = append(out, llm.Tensor{ Name: name, Kind: t.Kind(), Shape: t.Shape(), diff --git a/convert/convert_llama.go b/convert/convert_llama.go index 700049d3..ed6469c5 100644 --- a/convert/convert_llama.go +++ b/convert/convert_llama.go @@ -96,8 +96,8 @@ func (p *llama) KV(t *Tokenizer) llm.KV { return kv } -func (p *llama) Tensors(ts []Tensor) []*llm.Tensor { - var out []*llm.Tensor +func (p *llama) Tensors(ts []Tensor) []llm.Tensor { + var out []llm.Tensor for _, t := range ts { name := p.tensorName(t.Name()) if strings.HasSuffix(name, "attn_q.weight") || @@ -105,7 +105,7 @@ func (p *llama) Tensors(ts []Tensor) []*llm.Tensor { t.SetRepacker(p.repack) } - out = append(out, &llm.Tensor{ + out = append(out, llm.Tensor{ Name: name, Kind: t.Kind(), Shape: t.Shape(), diff --git a/convert/convert_mixtral.go b/convert/convert_mixtral.go index c55a27f8..3263a27b 100644 --- a/convert/convert_mixtral.go +++ b/convert/convert_mixtral.go @@ -31,7 +31,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV { return kv } -func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor { +func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor { oldnew := []string{ "model.layers", "blk", "w1", "ffn_gate_exps", @@ -58,10 +58,10 @@ func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor { return true }) - var out []*llm.Tensor + var out []llm.Tensor for n, e := range experts { // TODO(mxyng): sanity check experts - out = append(out, &llm.Tensor{ + out = append(out, llm.Tensor{ Name: n, Kind: e[0].Kind(), Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...), diff --git a/convert/reader.go b/convert/reader.go index 9be8ac2e..11ccaa81 100644 --- a/convert/reader.go +++ b/convert/reader.go @@ -29,6 +29,11 @@ func (t tensorBase) Shape() []uint64 { return t.shape } +const ( + tensorKindF32 uint32 = iota + tensorKindF16 +) + func (t tensorBase) Kind() uint32 { if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") { return 0 @@ -38,9 +43,9 @@ func (t tensorBase) Kind() uint32 { case 0: panic("invalid tensor shape") case 1: - return 0 + return tensorKindF32 default: - return 1 + return tensorKindF16 } } diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go index 440581af..d43c59a5 100644 --- a/convert/reader_safetensors.go +++ b/convert/reader_safetensors.go @@ -66,6 +66,7 @@ func parseSafetensors(ps ...string) ([]Tensor, error) { return ts, nil } +// safetensorsPad returns the padded size of the safetensors file given a length n and offset s func safetensorsPad(n, s int64) int64 { return 8 + n + s } @@ -125,9 +126,9 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) { } switch st.Kind() { - case 0: + case tensorKindF32: return 0, binary.Write(w, binary.LittleEndian, f32s) - case 1: + case tensorKindF16: f16s := make([]uint16, len(f32s)) for i := range f32s { f16s[i] = float16.Fromfloat32(f32s[i]).Bits() diff --git a/convert/tokenizer.go b/convert/tokenizer.go index baee04aa..43d8c14e 100644 --- a/convert/tokenizer.go +++ b/convert/tokenizer.go @@ -32,7 +32,7 @@ type Tokenizer struct { Template string } -func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { +func parseTokenizer(d string, specialTokenTypes []string) (*Tokenizer, error) { v, err := parseVocabulary(d) if err != nil { return nil, err @@ -66,6 +66,8 @@ func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { switch pt.Type { case "Split": if pt.Pattern.Regex != "" { + // create a checksum of all Split pretokenizers which should be sufficient + // to identify the pretokenizer sha256sum.Write([]byte(pt.Pattern.Regex)) } } @@ -102,7 +104,7 @@ func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) { } } - for _, st := range specialTypes { + for _, st := range specialTokenTypes { sv := SpecialVocabulary{Type: st} if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok { if err := json.Unmarshal(bts, &sv.AddToken); err != nil { @@ -224,14 +226,13 @@ func parseVocabulary(d string) (*Vocabulary, error) { } for pattern, parseFn := range patterns { - matches, err := filepath.Glob(filepath.Join(d, pattern)) - if err != nil { + if _, err := os.Stat(filepath.Join(d, pattern)); errors.Is(err, os.ErrNotExist) { + continue + } else if err != nil { return nil, err } - if len(matches) > 0 { - return parseFn(d) - } + return parseFn(d) } return nil, errors.New("unknown tensor format") diff --git a/llm/gguf.go b/llm/gguf.go index e61babf2..98158313 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -489,6 +489,7 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) { return a, nil } +// writeGGUFArray writes a slice s of type E to the write with a gguf type of t func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil { return err @@ -502,16 +503,10 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { return err } - for _, e := range s { - if err := binary.Write(w, binary.LittleEndian, e); err != nil { - return err - } - } - - return nil + return binary.Write(w, binary.LittleEndian, s) } -func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error { +func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { return err } @@ -537,7 +532,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error { } } - slices.SortFunc(ts, func(a, b *Tensor) int { + slices.SortFunc(ts, func(a, b Tensor) int { var i, j int if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 { return cmp.Compare(a.Name, b.Name) @@ -622,7 +617,7 @@ func ggufWriteKV(ws io.WriteSeeker, k string, v any) error { return err } -func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error { +func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error { slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset) if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil { return err @@ -649,7 +644,7 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error { return binary.Write(ws, binary.LittleEndian, t.Offset) } -func ggufWriteTensor(ws io.WriteSeeker, t *Tensor, alignment int64) error { +func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error { offset, err := ws.Seek(0, io.SeekCurrent) if err != nil { return err diff --git a/llm/memory_test.go b/llm/memory_test.go index 18c797ee..3220c8df 100644 --- a/llm/memory_test.go +++ b/llm/memory_test.go @@ -21,7 +21,7 @@ func TestEstimateGPULayers(t *testing.T) { defer f.Close() inputLayerCount := 5 - tensors := []*Tensor{ + tensors := []Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, diff --git a/server/routes_create_test.go b/server/routes_create_test.go index 4d616d8d..9b7009df 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -19,7 +19,7 @@ import ( var stream bool = false -func createBinFile(t *testing.T, kv map[string]any, ti []*llm.Tensor) string { +func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { t.Helper() f, err := os.CreateTemp(t.TempDir(), "") diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go index 02f95dd2..5c0caff1 100644 --- a/server/routes_generate_test.go +++ b/server/routes_generate_test.go @@ -101,7 +101,7 @@ func TestGenerateChat(t *testing.T) { "tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []*llm.Tensor{ + }, []llm.Tensor{ {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, @@ -149,7 +149,7 @@ func TestGenerateChat(t *testing.T) { Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ "general.architecture": "bert", "bert.pooling_type": uint32(0), - }, []*llm.Tensor{})), + }, []llm.Tensor{})), Stream: &stream, }) @@ -399,7 +399,7 @@ func TestGenerate(t *testing.T) { "tokenizer.ggml.tokens": []string{""}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []*llm.Tensor{ + }, []llm.Tensor{ {Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, {Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))}, @@ -447,7 +447,7 @@ func TestGenerate(t *testing.T) { Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{ "general.architecture": "bert", "bert.pooling_type": uint32(0), - }, []*llm.Tensor{})), + }, []llm.Tensor{})), Stream: &stream, }) diff --git a/server/sched_test.go b/server/sched_test.go index f3c55514..80395714 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -124,7 +124,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est "tokenizer.ggml.tokens": []string{" "}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, []*llm.Tensor{ + }, []llm.Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, }))