From d10ee42f68d090db17a8b87cac46ab6d1c2c8ca2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:38:37 +0800 Subject: [PATCH] [format] applied code formatting on changed files in pull request 5088 (#5127) Co-authored-by: github-actions --- .../booster/plugin/hybrid_parallel_plugin.py | 14 ++++++++++---- .../test_plugin/test_gemini_plugin.py | 19 +++++++++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/colossalai/booster/plugin/hybrid_parallel_plugin.py b/colossalai/booster/plugin/hybrid_parallel_plugin.py index 59a0deaeb..ed3a61ded 100644 --- a/colossalai/booster/plugin/hybrid_parallel_plugin.py +++ b/colossalai/booster/plugin/hybrid_parallel_plugin.py @@ -28,8 +28,8 @@ from colossalai.shardformer import ShardConfig, ShardFormer from colossalai.shardformer.layer.utils import SeqParallelUtils from colossalai.shardformer.policies.base_policy import Policy from colossalai.tensor.d_tensor.api import is_distributed_tensor -from colossalai.zero.low_level import LowLevelZeroOptimizer from colossalai.utils.device import get_current_device +from colossalai.zero.low_level import LowLevelZeroOptimizer from .pp_plugin_base import PipelinePluginBase @@ -385,7 +385,9 @@ class HybridParallelNaiveOptimizer(OptimizerWrapper): total_norm_exponentiated += grad_norm_exponentiated - total_norm_exponentiated_cuda = torch.tensor([float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32) + total_norm_exponentiated_cuda = torch.tensor( + [float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32 + ) if self.tp_size > 1: # compute norm in tp process group dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.tp_pg) @@ -586,7 +588,9 @@ class HybridParallelAMPOptimizer(MixedPrecisionOptimizer): total_norm_exponentiated += grad_norm_exponentiated - total_norm_exponentiated_cuda = torch.tensor([float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32) + total_norm_exponentiated_cuda = torch.tensor( + [float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32 + ) if self.tp_size > 1: # compute norm in tp process group dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.tp_pg) @@ -837,7 +841,9 @@ class HybridParallelZeroOptimizer(LowLevelZeroOptimizer): total_norm_exponentiated += grad_norm_exponentiated - total_norm_exponentiated_cuda = torch.tensor([float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32) + total_norm_exponentiated_cuda = torch.tensor( + [float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32 + ) if dp_size > 1: # compute norm in dp process group dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.dp_pg) diff --git a/tests/test_booster/test_plugin/test_gemini_plugin.py b/tests/test_booster/test_plugin/test_gemini_plugin.py index ddb4484ff..3c496ff64 100644 --- a/tests/test_booster/test_plugin/test_gemini_plugin.py +++ b/tests/test_booster/test_plugin/test_gemini_plugin.py @@ -1,7 +1,7 @@ from contextlib import nullcontext from typing import Optional -import pytest +import pytest import torch import torch.distributed as dist @@ -11,8 +11,6 @@ from colossalai.booster.plugin import GeminiPlugin from colossalai.fx import is_compatible_with_meta from colossalai.lazy.lazy_init import LazyInitContext from colossalai.nn.optimizer import HybridAdam -from colossalai.tensor.d_tensor.api import clear_layout_converter -from colossalai.shardformer.layer.utils import Randomizer from colossalai.tensor.colo_parameter import ColoParameter from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn from tests.kit.model_zoo import model_zoo @@ -26,7 +24,13 @@ def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero_size, t ctx = nullcontext() extra_dp_size = dist.get_world_size() // (zero_size * tp_size) enable_all_optimization = True if tp_size > 1 else False - plugin = GeminiPlugin(max_norm=1.0, initial_scale=2**5, tp_size=tp_size, extra_dp_size=extra_dp_size, enable_all_optimization=enable_all_optimization) + plugin = GeminiPlugin( + max_norm=1.0, + initial_scale=2**5, + tp_size=tp_size, + extra_dp_size=extra_dp_size, + enable_all_optimization=enable_all_optimization, + ) booster = Booster(plugin=plugin) with ctx: model = model_fn() @@ -66,7 +70,9 @@ def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero_size, t @parameterize("init_method", ["none"]) @parameterize("zero_size", [2]) @parameterize("tp_size", [2]) -def check_gemini_plugin(subset: str, init_method: str = "none", early_stop: bool = True, zero_size: int = 1, tp_size: int = 1): +def check_gemini_plugin( + subset: str, init_method: str = "none", early_stop: bool = True, zero_size: int = 1, tp_size: int = 1 +): """check gemini plugin over model zoo Args: @@ -161,6 +167,7 @@ def run_dist(rank, world_size, port, early_stop: bool = True): def test_gemini_plugin(early_stop: bool = True): spawn(run_dist, 4, early_stop=early_stop) + @pytest.mark.largedist @rerun_if_address_is_in_use() def test_gemini_plugin_3d(early_stop: bool = True): @@ -168,4 +175,4 @@ def test_gemini_plugin_3d(early_stop: bool = True): if __name__ == "__main__": - test_gemini_plugin(early_stop=False) \ No newline at end of file + test_gemini_plugin(early_stop=False)