Compare commits

...

1 Commits

Author SHA1 Message Date
Jeffrey Morgan
ca7c3f7e0f limit num_predict to num_ctx 2024-03-12 21:59:17 -07:00

View File

@ -172,6 +172,19 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
}
// Limit the number of predictions to the maximum context length
// this will cause no more than two context shifts
// TODO: limit this further to num_ctx - len(prompt) to avoid
// any context shifts at all
if predict.Options.NumPredict > llm.options.NumCtx {
slog.Warn(fmt.Sprintf("requested num_predict is greater than the context length (%d > %d), using %d instead", predict.Options.NumPredict, llm.options.NumCtx, llm.options.NumCtx))
predict.Options.NumPredict = llm.options.NumCtx
}
if predict.Options.NumPredict == -1 {
predict.Options.NumPredict = llm.options.NumCtx
}
request := map[string]any{
"prompt": predict.Prompt,
"stream": true,