[format] applied code formatting on changed files in pull request 5088 (#5127)

Co-authored-by: github-actions <github-actions@github.com>
pull/5133/head
github-actions[bot] 1 year ago committed by GitHub
parent 9110406a47
commit d10ee42f68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -28,8 +28,8 @@ from colossalai.shardformer import ShardConfig, ShardFormer
from colossalai.shardformer.layer.utils import SeqParallelUtils from colossalai.shardformer.layer.utils import SeqParallelUtils
from colossalai.shardformer.policies.base_policy import Policy from colossalai.shardformer.policies.base_policy import Policy
from colossalai.tensor.d_tensor.api import is_distributed_tensor from colossalai.tensor.d_tensor.api import is_distributed_tensor
from colossalai.zero.low_level import LowLevelZeroOptimizer
from colossalai.utils.device import get_current_device from colossalai.utils.device import get_current_device
from colossalai.zero.low_level import LowLevelZeroOptimizer
from .pp_plugin_base import PipelinePluginBase from .pp_plugin_base import PipelinePluginBase
@ -385,7 +385,9 @@ class HybridParallelNaiveOptimizer(OptimizerWrapper):
total_norm_exponentiated += grad_norm_exponentiated total_norm_exponentiated += grad_norm_exponentiated
total_norm_exponentiated_cuda = torch.tensor([float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32) total_norm_exponentiated_cuda = torch.tensor(
[float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32
)
if self.tp_size > 1: if self.tp_size > 1:
# compute norm in tp process group # compute norm in tp process group
dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.tp_pg) dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.tp_pg)
@ -586,7 +588,9 @@ class HybridParallelAMPOptimizer(MixedPrecisionOptimizer):
total_norm_exponentiated += grad_norm_exponentiated total_norm_exponentiated += grad_norm_exponentiated
total_norm_exponentiated_cuda = torch.tensor([float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32) total_norm_exponentiated_cuda = torch.tensor(
[float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32
)
if self.tp_size > 1: if self.tp_size > 1:
# compute norm in tp process group # compute norm in tp process group
dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.tp_pg) dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.tp_pg)
@ -837,7 +841,9 @@ class HybridParallelZeroOptimizer(LowLevelZeroOptimizer):
total_norm_exponentiated += grad_norm_exponentiated total_norm_exponentiated += grad_norm_exponentiated
total_norm_exponentiated_cuda = torch.tensor([float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32) total_norm_exponentiated_cuda = torch.tensor(
[float(total_norm_exponentiated)], device=get_current_device(), dtype=torch.float32
)
if dp_size > 1: if dp_size > 1:
# compute norm in dp process group # compute norm in dp process group
dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.dp_pg) dist.all_reduce(tensor=total_norm_exponentiated_cuda, op=dist.ReduceOp.SUM, group=self.dp_pg)

@ -1,7 +1,7 @@
from contextlib import nullcontext from contextlib import nullcontext
from typing import Optional from typing import Optional
import pytest
import pytest
import torch import torch
import torch.distributed as dist import torch.distributed as dist
@ -11,8 +11,6 @@ from colossalai.booster.plugin import GeminiPlugin
from colossalai.fx import is_compatible_with_meta from colossalai.fx import is_compatible_with_meta
from colossalai.lazy.lazy_init import LazyInitContext from colossalai.lazy.lazy_init import LazyInitContext
from colossalai.nn.optimizer import HybridAdam from colossalai.nn.optimizer import HybridAdam
from colossalai.tensor.d_tensor.api import clear_layout_converter
from colossalai.shardformer.layer.utils import Randomizer
from colossalai.tensor.colo_parameter import ColoParameter from colossalai.tensor.colo_parameter import ColoParameter
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from tests.kit.model_zoo import model_zoo from tests.kit.model_zoo import model_zoo
@ -26,7 +24,13 @@ def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero_size, t
ctx = nullcontext() ctx = nullcontext()
extra_dp_size = dist.get_world_size() // (zero_size * tp_size) extra_dp_size = dist.get_world_size() // (zero_size * tp_size)
enable_all_optimization = True if tp_size > 1 else False enable_all_optimization = True if tp_size > 1 else False
plugin = GeminiPlugin(max_norm=1.0, initial_scale=2**5, tp_size=tp_size, extra_dp_size=extra_dp_size, enable_all_optimization=enable_all_optimization) plugin = GeminiPlugin(
max_norm=1.0,
initial_scale=2**5,
tp_size=tp_size,
extra_dp_size=extra_dp_size,
enable_all_optimization=enable_all_optimization,
)
booster = Booster(plugin=plugin) booster = Booster(plugin=plugin)
with ctx: with ctx:
model = model_fn() model = model_fn()
@ -66,7 +70,9 @@ def run_fn(init_method, model_fn, data_gen_fn, output_transform_fn, zero_size, t
@parameterize("init_method", ["none"]) @parameterize("init_method", ["none"])
@parameterize("zero_size", [2]) @parameterize("zero_size", [2])
@parameterize("tp_size", [2]) @parameterize("tp_size", [2])
def check_gemini_plugin(subset: str, init_method: str = "none", early_stop: bool = True, zero_size: int = 1, tp_size: int = 1): def check_gemini_plugin(
subset: str, init_method: str = "none", early_stop: bool = True, zero_size: int = 1, tp_size: int = 1
):
"""check gemini plugin over model zoo """check gemini plugin over model zoo
Args: Args:
@ -161,6 +167,7 @@ def run_dist(rank, world_size, port, early_stop: bool = True):
def test_gemini_plugin(early_stop: bool = True): def test_gemini_plugin(early_stop: bool = True):
spawn(run_dist, 4, early_stop=early_stop) spawn(run_dist, 4, early_stop=early_stop)
@pytest.mark.largedist @pytest.mark.largedist
@rerun_if_address_is_in_use() @rerun_if_address_is_in_use()
def test_gemini_plugin_3d(early_stop: bool = True): def test_gemini_plugin_3d(early_stop: bool = True):
@ -168,4 +175,4 @@ def test_gemini_plugin_3d(early_stop: bool = True):
if __name__ == "__main__": if __name__ == "__main__":
test_gemini_plugin(early_stop=False) test_gemini_plugin(early_stop=False)

Loading…
Cancel
Save