fix(timeout): larger timeout (#495)

* larger initialize timeout

* unify time format

* update timeout thresholds
pull/512/head
jiaopenglong 2023-11-21 19:19:22 +08:00 committed by GitHub
parent eba2b859fc
commit f5aea7e08c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 7 deletions

View File

@ -118,7 +118,7 @@ def filter_kwargs(func, kwargs):
def launch_time():
global CURRENT_TIME
if not CURRENT_TIME:
CURRENT_TIME = datetime.now().strftime("%m-%d:%H:%M:%S")
CURRENT_TIME = datetime.now().strftime("%m-%d-%H:%M:%S")
return CURRENT_TIME

View File

@ -39,14 +39,14 @@ ENABLE_TIMEOUT = os.getenv("INTERNLM_ENABLE_TIMEOUT", None)
timeout_threshold_dict = {
"initialize_distributed_env": 120,
"initialize_distributed_env": 240,
"nopp_forward_backward_step": 360,
"initialize_model": 10,
"initialize_optimizer": 20,
"optim_step": 30,
"initialize_model": 60,
"initialize_optimizer": 60,
"optim_step": 60,
"get_train_data_loader": 600,
"get_validation_data_loader": 60,
"load_new_batch": 10,
"get_validation_data_loader": 120,
"load_new_batch": 20,
"record_current_batch_training_metrics": 10,
"save_checkpoint": 1200,
"interleaved_forward_backward_step": 600,