Compare commits
1 Commits
main
...
bmizerany/
Author | SHA1 | Date | |
---|---|---|---|
|
acbffa59e9 |
@ -321,7 +321,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
|
|||||||
embedding := llm.KV().EmbeddingLength()
|
embedding := llm.KV().EmbeddingLength()
|
||||||
heads := llm.KV().HeadCount()
|
heads := llm.KV().HeadCount()
|
||||||
headsKV := llm.KV().HeadCountKV()
|
headsKV := llm.KV().HeadCountKV()
|
||||||
vocab := uint64(len(llm.KV()["tokenizer.ggml.tokens"].([]any)))
|
vocab := llm.KV()["tokenizer.ggml.tokens"].(*array).size
|
||||||
|
|
||||||
embeddingHeads := llm.KV().EmbeddingHeadCount()
|
embeddingHeads := llm.KV().EmbeddingHeadCount()
|
||||||
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
|
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
|
||||||
|
36
llm/gguf.go
36
llm/gguf.go
@ -316,7 +316,7 @@ func writeGGUFString(llm *gguf, w io.Writer, s string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
func readGGUFV1Array(llm *gguf, r io.Reader) (*array, error) {
|
||||||
t, err := readGGUF[uint32](llm, r)
|
t, err := readGGUF[uint32](llm, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -327,6 +327,8 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a := &array{size: uint64(n)}
|
||||||
|
|
||||||
for i := 0; uint32(i) < n; i++ {
|
for i := 0; uint32(i) < n; i++ {
|
||||||
var e any
|
var e any
|
||||||
switch t {
|
switch t {
|
||||||
@ -361,13 +363,27 @@ func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
a = append(a, e)
|
if len(a.values) < arrayMaxSize {
|
||||||
|
a.values = append(a.values, e)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return a, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
const arrayMaxSize = 1000
|
||||||
|
|
||||||
|
type array struct {
|
||||||
|
size uint64
|
||||||
|
|
||||||
|
// values is the slice of values in the array.
|
||||||
|
//
|
||||||
|
// Its length may be less than size if the array is too big to reaonably
|
||||||
|
// fit in memory. The current limit si arrayMaxSize.
|
||||||
|
values []any
|
||||||
|
}
|
||||||
|
|
||||||
|
func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
|
||||||
if llm.Version == 1 {
|
if llm.Version == 1 {
|
||||||
return readGGUFV1Array(llm, r)
|
return readGGUFV1Array(llm, r)
|
||||||
}
|
}
|
||||||
@ -382,6 +398,8 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a := &array{size: n}
|
||||||
|
|
||||||
for i := 0; uint64(i) < n; i++ {
|
for i := 0; uint64(i) < n; i++ {
|
||||||
var e any
|
var e any
|
||||||
switch t {
|
switch t {
|
||||||
@ -416,10 +434,16 @@ func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
a = append(a, e)
|
// TODO(bmizerany): We may want to only enforce this limit
|
||||||
|
// on certain fields, however, as of now, I (bmizerany) do
|
||||||
|
// not know of any array fields that are needed by Ollama that
|
||||||
|
// exceed this limit.
|
||||||
|
if len(a.values) < arrayMaxSize {
|
||||||
|
a.values = append(a.values, e)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return a, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
|
func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user