diff --git a/llama/runner/runner.go b/llama/runner/runner.go index 9aca6cfe..c78f2ee8 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -178,7 +178,7 @@ func (s *Server) run(ctx context.Context) { for j, t := range seq.tokens { // todo: make this n_batch - if j > s.batchSize { + if j >= s.batchSize { break } batch.Add(t, seq.nPast, []int{i}, !seq.prompt())