runner.go: Implement RepeatLastN to penalize repeated tokens

RepeatLastN is a user-facing parameter that is exposed that is exposed
through the APIs but is not currently plumbed through.
This commit is contained in:
Jesse Gross 2024-08-20 11:21:19 -07:00 committed by jmorganca
parent eccd4dd8d2
commit 477f529d26
4 changed files with 5 additions and 0 deletions

View File

@ -390,6 +390,7 @@ type SamplingParams struct {
TfsZ float32
TypicalP float32
Temp float32
RepeatLastN int
PenaltyRepeat float32
PenaltyFreq float32
PenaltyPresent float32
@ -408,6 +409,7 @@ func NewSamplingContext(params SamplingParams) *SamplingContext {
cparams.tfs_z = C.float(params.TfsZ)
cparams.typical_p = C.float(params.TypicalP)
cparams.temp = C.float(params.Temp)
cparams.penalty_last_n = C.int32_t(params.RepeatLastN)
cparams.penalty_repeat = C.float(params.PenaltyRepeat)
cparams.penalty_freq = C.float(params.PenaltyFreq)
cparams.penalty_present = C.float(params.PenaltyFreq)

View File

@ -402,6 +402,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
samplingParams.TfsZ = req.TFSZ
samplingParams.TypicalP = req.TypicalP
samplingParams.Temp = req.Temperature
samplingParams.RepeatLastN = req.RepeatLastN
samplingParams.PenaltyRepeat = req.RepeatPenalty
samplingParams.PenaltyFreq = req.FrequencyPenalty
samplingParams.PenaltyPresent = req.PresencePenalty

View File

@ -10,6 +10,7 @@ struct llama_sampling_context *llama_sampling_cinit(struct llama_sampling_cparam
sparams.tfs_z = params->tfs_z;
sparams.typical_p = params->typical_p;
sparams.temp = params->temp;
sparams.penalty_last_n = params->penalty_last_n;
sparams.penalty_repeat = params->penalty_repeat;
sparams.penalty_freq = params->penalty_freq;
sparams.penalty_present = params->penalty_present;

View File

@ -16,6 +16,7 @@ extern "C"
float tfs_z;
float typical_p;
float temp;
int32_t penalty_last_n;
float penalty_repeat;
float penalty_freq;
float penalty_present;