llama.go: Use dynamic buffer for TokenToPiece

The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer, which is usually but not always enough to hold a token. This increase the buffer size if needed, similar to what llama.cpp does internally.
2024-08-19 17:54:57 -07:00 · 2024-08-19 17:54:57 -07:00 · 523d84c563
commit 523d84c563
parent ed19fad862
1 changed files with 18 additions and 4 deletions
--- a/llama/llama.go
+++ b/llama/llama.go
@ -260,15 +260,29 @@ type Model struct {
 }

 func (m *Model) TokenToPiece(token int) string {
-	buf := make([]byte, 12)
-	C.llama_token_to_piece(
+	tokenLen := 12
+	buf := make([]byte, tokenLen)
+	tokenLen = int(C.llama_token_to_piece(
 		m.c,
 		C.int32_t(token),
 		(*C.char)(unsafe.Pointer(&buf[0])),
-		C.int32_t(12),
+		C.int32_t(tokenLen),
 		C.int32_t(0),
 		C.bool(true),
-	)
+	))
+	if tokenLen < 0 {
+		tokenLen = -tokenLen
+
+		buf = make([]byte, tokenLen)
+		C.llama_token_to_piece(
+			m.c,
+			C.int32_t(token),
+			(*C.char)(unsafe.Pointer(&buf[0])),
+			C.int32_t(tokenLen),
+			C.int32_t(0),
+			C.bool(true),
+		)
+	}
 	return strings.TrimRight(string(buf), "\x00")
 }