llama.go: Use dynamic buffer for TokenToPiece

The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer,
which is usually but not always enough to hold a token. This increase
the buffer size if needed, similar to what llama.cpp does internally.
This commit is contained in:
Jesse Gross 2024-08-19 17:54:57 -07:00 committed by jmorganca
parent ed19fad862
commit 523d84c563

View File

@ -260,15 +260,29 @@ type Model struct {
}
func (m *Model) TokenToPiece(token int) string {
buf := make([]byte, 12)
C.llama_token_to_piece(
tokenLen := 12
buf := make([]byte, tokenLen)
tokenLen = int(C.llama_token_to_piece(
m.c,
C.int32_t(token),
(*C.char)(unsafe.Pointer(&buf[0])),
C.int32_t(12),
C.int32_t(tokenLen),
C.int32_t(0),
C.bool(true),
)
))
if tokenLen < 0 {
tokenLen = -tokenLen
buf = make([]byte, tokenLen)
C.llama_token_to_piece(
m.c,
C.int32_t(token),
(*C.char)(unsafe.Pointer(&buf[0])),
C.int32_t(tokenLen),
C.int32_t(0),
C.bool(true),
)
}
return strings.TrimRight(string(buf), "\x00")
}