From ad0342257170fd6d63998ca524dd6ebcba445f8c Mon Sep 17 00:00:00 2001 From: Richard Lyons Date: Tue, 22 Oct 2024 16:58:21 +0200 Subject: [PATCH 1/2] Add tensors for bitnet/triLMs, Q4_x_x --- llm/filetype.go | 25 +++++++++++++++++++++++++ llm/ggml.go | 12 +++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/llm/filetype.go b/llm/filetype.go index 7a8e9f69..05a2f70f 100644 --- a/llm/filetype.go +++ b/llm/filetype.go @@ -37,6 +37,11 @@ const ( fileTypeIQ2_M fileTypeIQ1_M fileTypeBF16 + fileTypeQ4_0_4_4 + fileTypeQ4_0_4_8 + fileTypeQ4_0_8_8 + fileTypeTQ1_0 + fileTypeTQ2_0 fileTypeUnknown ) @@ -103,6 +108,16 @@ func ParseFileType(s string) (fileType, error) { return fileTypeIQ1_M, nil case "BF16": return fileTypeBF16, nil + case "Q4_0_4_4": + return fileTypeQ4_0_4_4, nil + case "Q4_0_4_8": + return fileTypeQ4_0_4_8, nil + case "Q4_0_8_8": + return fileTypeQ4_0_8_8, nil + case "TQ1_0": + return fileTypeTQ1_0, nil + case "TQ2_0": + return fileTypeTQ2_0, nil default: return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s) } @@ -170,6 +185,16 @@ func (t fileType) String() string { return "IQ1_M" case fileTypeBF16: return "BF16" + case fileTypeQ4_0_4_4: + return "Q4_0_4_4" + case fileTypeQ4_0_4_8: + return "Q4_0_4_8" + case fileTypeQ4_0_8_8: + return "Q4_0_8_8" + case fileTypeTQ1_0: + return "TQ1_0" + case fileTypeTQ2_0: + return "TQ2_0" default: return "unknown" } diff --git a/llm/ggml.go b/llm/ggml.go index e857d4b8..fe060ff8 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -188,7 +188,7 @@ func (t Tensor) blockSize() uint64 { switch t.Kind { case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16 return 1 - case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL + case 2, 3, 4, 5, 6, 7, 8, 9, 20, 31, 32, 33: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL, Q4_0_4_4, Q4_0_4_8, Q4_0_8_8 return 32 default: // All others return 256 @@ -257,6 +257,16 @@ func (t Tensor) typeSize() uint64 { return blockSize/8 + blockSize/16 + blockSize/32 case 30: // BF16 return 2 + case 31: // Q4_0_4_4 + return 2 + blockSize/2 + case 32: // Q4_0_4_8 + return 2 + blockSize/2 + case 33: // Q4_0_8_8 + return 2 + blockSize/2 + case 34: // TQ1_0 + return 2 + blockSize/64 + (blockSize - 4*blockSize/64) / 5 + case 35: // TQ2_0 + return 2 + blockSize/4 default: return 0 } From c24c79c7d08ff0ec29608c1baaf2769772f1c7e2 Mon Sep 17 00:00:00 2001 From: Richard Lyons Date: Tue, 22 Oct 2024 18:10:48 +0200 Subject: [PATCH 2/2] Fix whitespace. --- llm/ggml.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm/ggml.go b/llm/ggml.go index fe060ff8..52a15a31 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -264,7 +264,7 @@ func (t Tensor) typeSize() uint64 { case 33: // Q4_0_8_8 return 2 + blockSize/2 case 34: // TQ1_0 - return 2 + blockSize/64 + (blockSize - 4*blockSize/64) / 5 + return 2 + blockSize/64 + (blockSize-4*blockSize/64)/5 case 35: // TQ2_0 return 2 + blockSize/4 default: