|
__init__.py
|
initial commit
|
2023-07-06 12:55:23 +08:00 |
|
checkpoint.py
|
initial commit
|
2023-07-06 12:55:23 +08:00 |
|
common.py
|
fix(timeout): larger timeout (#495)
|
2023-11-21 19:19:22 +08:00 |
|
megatron_timers.py
|
feat: add runtime diag (#297)
|
2023-09-08 17:56:46 +08:00 |
|
model_checkpoint.py
|
auto resume
|
2023-12-07 10:19:48 +08:00 |
|
parallel.py
|
feat(optimizer): zero gradient count (#449)
|
2023-10-27 16:26:55 +08:00 |
|
registry.py
|
Merge develop to main (#233)
|
2023-08-24 22:03:04 +08:00 |
|
storage_manager.py
|
auto resume
|
2023-12-07 10:19:48 +08:00 |
|
timeout.py
|
fix(timeout): larger timeout (#495)
|
2023-11-21 19:19:22 +08:00 |
|
writer.py
|
fix(train): unify the exp paths (#492)
|
2023-11-11 20:15:59 +08:00 |