diff --git a/llm/filetype.go b/llm/filetype.go index e5e9410d..cb442848 100644 --- a/llm/filetype.go +++ b/llm/filetype.go @@ -29,6 +29,11 @@ const ( fileTypeQ2_K_S fileTypeQ3_K_XS fileTypeIQ3_XXS + fileTypeIQ1_S + fileTypeIQ4_NL + fileTypeIQ3_S + fileTypeIQ2_S + fileTypeIQ4_XS fileTypeUnknown ) @@ -77,6 +82,16 @@ func ParseFileType(s string) (fileType, error) { return fileTypeQ2_K_S, nil case "Q3_K_XS": return fileTypeQ3_K_XS, nil + case "IQ1_S": + return fileTypeIQ1_S, nil + case "IQ4_NL": + return fileTypeIQ4_NL, nil + case "IQ3_S": + return fileTypeIQ3_S, nil + case "IQ2_S": + return fileTypeIQ2_S, nil + case "IQ4_XS": + return fileTypeIQ4_XS, nil case "IQ3_XXS": return fileTypeIQ3_XXS, nil default: @@ -128,6 +143,16 @@ func (t fileType) String() string { return "Q2_K_S" case fileTypeQ3_K_XS: return "Q3_K_XS" + case fileTypeIQ1_S: + return "IQ1_S" + case fileTypeIQ4_NL: + return "IQ4_NL" + case fileTypeIQ3_S: + return "IQ3_S" + case fileTypeIQ2_S: + return "IQ2_S" + case fileTypeIQ4_XS: + return "IQ4_XS" case fileTypeIQ3_XXS: return "IQ3_XXS" default: diff --git a/llm/ggml.go b/llm/ggml.go index a83bba8f..7093ca64 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -125,9 +125,9 @@ type Tensor struct { func (t Tensor) blockSize() uint64 { switch { - case t.Kind < 2: + case t.Kind < 2 || (t.Kind > 23 && t.Kind < 29): return 1 - case t.Kind < 10: + case t.Kind < 10 || t.Kind == 20: return 32 default: return 256 @@ -172,6 +172,16 @@ func (t Tensor) typeSize() uint64 { return 2 + 2*blockSize/8 + blockSize/32 case 18: // IQ3_XXS return 2 + 3*blockSize/8 + case 19: // IQ1_S + return 2 + blockSize/8 + blockSize/16 + case 20: // IQ4_NL + return 2 + blockSize/2 + case 21: // IQ3_S + return 2 + 2*blockSize/8 + blockSize/8 + blockSize/32 + 4 + case 22: // IQ2_S + return 2 + blockSize/4 + blockSize/16 + case 23: // IQ4_XS + return 4 + blockSize/2 + blockSize/64 default: return 0 }