mirror of https://github.com/InternLM/InternLM
fix(timeout): larger timeout (#495)
* larger initialize timeout * unify time format * update timeout thresholdspull/512/head
parent
eba2b859fc
commit
f5aea7e08c
|
@ -118,7 +118,7 @@ def filter_kwargs(func, kwargs):
|
|||
def launch_time():
|
||||
global CURRENT_TIME
|
||||
if not CURRENT_TIME:
|
||||
CURRENT_TIME = datetime.now().strftime("%m-%d:%H:%M:%S")
|
||||
CURRENT_TIME = datetime.now().strftime("%m-%d-%H:%M:%S")
|
||||
return CURRENT_TIME
|
||||
|
||||
|
||||
|
|
|
@ -39,14 +39,14 @@ ENABLE_TIMEOUT = os.getenv("INTERNLM_ENABLE_TIMEOUT", None)
|
|||
|
||||
|
||||
timeout_threshold_dict = {
|
||||
"initialize_distributed_env": 120,
|
||||
"initialize_distributed_env": 240,
|
||||
"nopp_forward_backward_step": 360,
|
||||
"initialize_model": 10,
|
||||
"initialize_optimizer": 20,
|
||||
"optim_step": 30,
|
||||
"initialize_model": 60,
|
||||
"initialize_optimizer": 60,
|
||||
"optim_step": 60,
|
||||
"get_train_data_loader": 600,
|
||||
"get_validation_data_loader": 60,
|
||||
"load_new_batch": 10,
|
||||
"get_validation_data_loader": 120,
|
||||
"load_new_batch": 20,
|
||||
"record_current_batch_training_metrics": 10,
|
||||
"save_checkpoint": 1200,
|
||||
"interleaved_forward_backward_step": 600,
|
||||
|
|
Loading…
Reference in New Issue