|
|
|
@ -131,14 +131,14 @@ class DynamicBatchManager:
|
|
|
|
|
self.stats_tool.count_prompt_tokens(new_batch)
|
|
|
|
|
self.running_batch = new_batch
|
|
|
|
|
yield from self._prefill_batch(self.running_batch)
|
|
|
|
|
self._filter_runing_batch()
|
|
|
|
|
self._filter_running_batch()
|
|
|
|
|
self.has_wait_tokens = 0
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
if self.has_wait_tokens < self.max_wait_tokens:
|
|
|
|
|
self.stats_tool.count_output_tokens(self.running_batch)
|
|
|
|
|
yield from self._decode_batch(self.running_batch)
|
|
|
|
|
self._filter_runing_batch()
|
|
|
|
|
self._filter_running_batch()
|
|
|
|
|
self.has_wait_tokens += 1
|
|
|
|
|
return
|
|
|
|
|
else:
|
|
|
|
@ -154,7 +154,7 @@ class DynamicBatchManager:
|
|
|
|
|
else:
|
|
|
|
|
self.stats_tool.count_output_tokens(self.running_batch)
|
|
|
|
|
yield from self._decode_batch(self.running_batch)
|
|
|
|
|
self._filter_runing_batch()
|
|
|
|
|
self._filter_running_batch()
|
|
|
|
|
self.has_wait_tokens += 1
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
@ -243,7 +243,7 @@ class DynamicBatchManager:
|
|
|
|
|
self._filter_batch(batch)
|
|
|
|
|
yield from self._output_process(finished_reqs)
|
|
|
|
|
|
|
|
|
|
def _filter_runing_batch(self):
|
|
|
|
|
def _filter_running_batch(self):
|
|
|
|
|
if self.running_batch is not None and self.running_batch.is_clear():
|
|
|
|
|
self.running_batch = None
|
|
|
|
|
|
|
|
|
|