enable --reranking flag for rerank handler while starting server
Signed-off-by: Liu Yuan <namei.unix@gmail.com>
This commit is contained in:
parent
67818b5093
commit
bfae776f34
@ -242,6 +242,7 @@ type Runner struct {
|
|||||||
UseMMap *bool `json:"use_mmap,omitempty"`
|
UseMMap *bool `json:"use_mmap,omitempty"`
|
||||||
UseMLock bool `json:"use_mlock,omitempty"`
|
UseMLock bool `json:"use_mlock,omitempty"`
|
||||||
NumThread int `json:"num_thread,omitempty"`
|
NumThread int `json:"num_thread,omitempty"`
|
||||||
|
Reranking bool `json:"reranking,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmbedRequest is the request passed to [Client.Embed].
|
// EmbedRequest is the request passed to [Client.Embed].
|
||||||
|
@ -189,6 +189,10 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
|
|||||||
"--batch-size", strconv.Itoa(opts.NumBatch),
|
"--batch-size", strconv.Itoa(opts.NumBatch),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.Reranking {
|
||||||
|
params = append(params, "--reranking")
|
||||||
|
}
|
||||||
|
|
||||||
if opts.NumGPU >= 0 {
|
if opts.NumGPU >= 0 {
|
||||||
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
||||||
}
|
}
|
||||||
|
@ -371,6 +371,10 @@ func (s *Server) RerankHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if req.Options == nil {
|
||||||
|
req.Options = make(map[string]any)
|
||||||
|
}
|
||||||
|
req.Options["reranking"] = true
|
||||||
r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
|
r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleScheduleError(c, req.Model, err)
|
handleScheduleError(c, req.Model, err)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user