From 523d84c56377737501910f7dee8d9dac5864a55c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Mon, 19 Aug 2024 17:54:57 -0700
Subject: [PATCH] llama.go: Use dynamic buffer for TokenToPiece

The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer,
which is usually but not always enough to hold a token. This increase
the buffer size if needed, similar to what llama.cpp does internally.
---
 llama/llama.go | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/llama/llama.go b/llama/llama.go
index 35ac0d4e..98f86438 100644
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -260,15 +260,29 @@ type Model struct {
 }
 
 func (m *Model) TokenToPiece(token int) string {
-	buf := make([]byte, 12)
-	C.llama_token_to_piece(
+	tokenLen := 12
+	buf := make([]byte, tokenLen)
+	tokenLen = int(C.llama_token_to_piece(
 		m.c,
 		C.int32_t(token),
 		(*C.char)(unsafe.Pointer(&buf[0])),
-		C.int32_t(12),
+		C.int32_t(tokenLen),
 		C.int32_t(0),
 		C.bool(true),
-	)
+	))
+	if tokenLen < 0 {
+		tokenLen = -tokenLen
+
+		buf = make([]byte, tokenLen)
+		C.llama_token_to_piece(
+			m.c,
+			C.int32_t(token),
+			(*C.char)(unsafe.Pointer(&buf[0])),
+			C.int32_t(tokenLen),
+			C.int32_t(0),
+			C.bool(true),
+		)
+	}
 	return strings.TrimRight(string(buf), "\x00")
 }