From 6602e793c011805bec36d7d5b1f27537fe2f2353 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Fri, 10 May 2024 13:06:13 -0700 Subject: [PATCH] Use `--quantize` flag and `quantize` api parameter (#4321) * rename `--quantization` to `--quantize` * backwards * Update api/types.go Co-authored-by: Michael Yang --------- Co-authored-by: Michael Yang --- api/types.go | 13 ++++++++----- cmd/cmd.go | 6 +++--- server/routes.go | 7 ++++++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/api/types.go b/api/types.go index c210d419..fcab6fef 100644 --- a/api/types.go +++ b/api/types.go @@ -197,14 +197,17 @@ type EmbeddingResponse struct { // CreateRequest is the request passed to [Client.Create]. type CreateRequest struct { - Model string `json:"model"` - Path string `json:"path"` - Modelfile string `json:"modelfile"` - Stream *bool `json:"stream,omitempty"` - Quantization string `json:"quantization,omitempty"` + Model string `json:"model"` + Path string `json:"path"` + Modelfile string `json:"modelfile"` + Stream *bool `json:"stream,omitempty"` + Quantize string `json:"quantize,omitempty"` // Name is deprecated, see Model Name string `json:"name"` + + // Quantization is deprecated, see Quantize + Quantization string `json:"quantization,omitempty"` } // DeleteRequest is the request passed to [Client.Delete]. diff --git a/cmd/cmd.go b/cmd/cmd.go index bf305d81..7814734a 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -142,9 +142,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error { return nil } - quantization, _ := cmd.Flags().GetString("quantization") + quantize, _ := cmd.Flags().GetString("quantize") - request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization} + request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize} if err := client.Create(cmd.Context(), &request, fn); err != nil { return err } @@ -1051,7 +1051,7 @@ func NewCLI() *cobra.Command { } createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")") - createCmd.Flags().StringP("quantization", "q", "", "Quantization level.") + createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)") showCmd := &cobra.Command{ Use: "show MODEL", diff --git a/server/routes.go b/server/routes.go index ec9f0e76..600a30fa 100644 --- a/server/routes.go +++ b/server/routes.go @@ -554,7 +554,12 @@ func (s *Server) CreateModelHandler(c *gin.Context) { ctx, cancel := context.WithCancel(c.Request.Context()) defer cancel() - if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil { + quantization := req.Quantization + if req.Quantize != "" { + quantization = req.Quantize + } + + if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(quantization), modelfile, fn); err != nil { ch <- gin.H{"error": err.Error()} } }()