From 523d84c56377737501910f7dee8d9dac5864a55c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 19 Aug 2024 17:54:57 -0700 Subject: [PATCH] llama.go: Use dynamic buffer for TokenToPiece The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer, which is usually but not always enough to hold a token. This increase the buffer size if needed, similar to what llama.cpp does internally. --- llama/llama.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/llama/llama.go b/llama/llama.go index 35ac0d4e..98f86438 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -260,15 +260,29 @@ type Model struct { } func (m *Model) TokenToPiece(token int) string { - buf := make([]byte, 12) - C.llama_token_to_piece( + tokenLen := 12 + buf := make([]byte, tokenLen) + tokenLen = int(C.llama_token_to_piece( m.c, C.int32_t(token), (*C.char)(unsafe.Pointer(&buf[0])), - C.int32_t(12), + C.int32_t(tokenLen), C.int32_t(0), C.bool(true), - ) + )) + if tokenLen < 0 { + tokenLen = -tokenLen + + buf = make([]byte, tokenLen) + C.llama_token_to_piece( + m.c, + C.int32_t(token), + (*C.char)(unsafe.Pointer(&buf[0])), + C.int32_t(tokenLen), + C.int32_t(0), + C.bool(true), + ) + } return strings.TrimRight(string(buf), "\x00") }