diff --git a/internlm/initialize/initialize_trainer.py b/internlm/initialize/initialize_trainer.py index 2ed22c5..01f0d14 100644 --- a/internlm/initialize/initialize_trainer.py +++ b/internlm/initialize/initialize_trainer.py @@ -67,6 +67,7 @@ def initialize_trainer( handler = PipelineSharedModuleGradientHandler(model=model, optimizer=optimizer) gradient_handlers.append(handler) + scheduler = None scheduler = NonPipelineScheduler(gradient_accumulation_size=gpc.config.data.gradient_accumulation) engine = Engine(