enable --reranking flag for rerank handler while starting server

Signed-off-by: Liu Yuan <namei.unix@gmail.com>
This commit is contained in:
Liu Yuan 2024-10-31 22:48:27 +08:00
parent 67818b5093
commit bfae776f34
3 changed files with 9 additions and 0 deletions

View File

@ -242,6 +242,7 @@ type Runner struct {
UseMMap *bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"`
Reranking bool `json:"reranking,omitempty"`
}
// EmbedRequest is the request passed to [Client.Embed].

View File

@ -189,6 +189,10 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
"--batch-size", strconv.Itoa(opts.NumBatch),
}
if opts.Reranking {
params = append(params, "--reranking")
}
if opts.NumGPU >= 0 {
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
}

View File

@ -371,6 +371,10 @@ func (s *Server) RerankHandler(c *gin.Context) {
return
}
if req.Options == nil {
req.Options = make(map[string]any)
}
req.Options["reranking"] = true
r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
if err != nil {
handleScheduleError(c, req.Model, err)