diff --git a/api/types.go b/api/types.go index afdeb10f..f40819a3 100644 --- a/api/types.go +++ b/api/types.go @@ -242,6 +242,7 @@ type Runner struct { UseMMap *bool `json:"use_mmap,omitempty"` UseMLock bool `json:"use_mlock,omitempty"` NumThread int `json:"num_thread,omitempty"` + Reranking bool `json:"reranking,omitempty"` } // EmbedRequest is the request passed to [Client.Embed]. diff --git a/llm/server.go b/llm/server.go index 0c732ea9..c5a1da64 100644 --- a/llm/server.go +++ b/llm/server.go @@ -189,6 +189,10 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter "--batch-size", strconv.Itoa(opts.NumBatch), } + if opts.Reranking { + params = append(params, "--reranking") + } + if opts.NumGPU >= 0 { params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU)) } diff --git a/server/routes.go b/server/routes.go index 33cdbbda..4d19a827 100644 --- a/server/routes.go +++ b/server/routes.go @@ -371,6 +371,10 @@ func (s *Server) RerankHandler(c *gin.Context) { return } + if req.Options == nil { + req.Options = make(map[string]any) + } + req.Options["reranking"] = true r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive) if err != nil { handleScheduleError(c, req.Model, err)