mirror of https://github.com/InternLM/InternLM
feat: fore save ckpt when loop exit
parent
91480c5b63
commit
d2e01dfed8
|
@ -980,12 +980,26 @@ now step_count is {train_state.step_count}",
|
||||||
|
|
||||||
return now_break, now_save_ckpt, save_type
|
return now_break, now_save_ckpt, save_type
|
||||||
|
|
||||||
def is_now_to_save_ckpt(self, train_state) -> (bool, CheckpointSaveType, bool):
|
def is_now_to_save_ckpt(self, train_state, force=False) -> (bool, CheckpointSaveType, bool):
|
||||||
|
"""The function is used to determine whether to save ckpt now."""
|
||||||
save_ckpts, save_type, now_break = False, CheckpointSaveType.NORMAL_CHECKPOINT, False
|
save_ckpts, save_type, now_break = False, CheckpointSaveType.NORMAL_CHECKPOINT, False
|
||||||
if self.oss_snapshot_freq > 1 and train_state.step_count % self.oss_snapshot_freq == 0:
|
if force:
|
||||||
|
return True, CheckpointSaveType.NORMAL_CHECKPOINT, False
|
||||||
|
|
||||||
|
if (
|
||||||
|
self.oss_snapshot_freq > 1
|
||||||
|
and train_state.step_count > 0
|
||||||
|
and train_state.step_count % self.oss_snapshot_freq == 0
|
||||||
|
):
|
||||||
save_ckpts, save_type = True, CheckpointSaveType.SNAPSHOT_CHECKPOINT
|
save_ckpts, save_type = True, CheckpointSaveType.SNAPSHOT_CHECKPOINT
|
||||||
if train_state.step_count % self.checkpoint_every == 0 or train_state.step_count == train_state.total_steps:
|
|
||||||
|
if (
|
||||||
|
train_state.step_count > 0
|
||||||
|
and train_state.step_count % self.checkpoint_every == 0
|
||||||
|
or train_state.step_count == train_state.total_steps
|
||||||
|
):
|
||||||
save_ckpts, save_type = True, CheckpointSaveType.NORMAL_CHECKPOINT
|
save_ckpts, save_type = True, CheckpointSaveType.NORMAL_CHECKPOINT
|
||||||
|
|
||||||
now_break, singal_save_ckpts, singal_save_type = self.quit_signal_handler(train_state)
|
now_break, singal_save_ckpts, singal_save_type = self.quit_signal_handler(train_state)
|
||||||
if save_ckpts is False:
|
if save_ckpts is False:
|
||||||
save_ckpts = singal_save_ckpts
|
save_ckpts = singal_save_ckpts
|
||||||
|
|
Loading…
Reference in New Issue