From 87d2defda63526978b71fe372c145063f8a73fa6 Mon Sep 17 00:00:00 2001 From: Ofey Chan Date: Wed, 4 Jan 2023 10:19:46 +0800 Subject: [PATCH] [NFC] polish colossalai/auto_parallel/tensor_shard/deprecated/op_handler/layer_norm_handler.py code style (#2305) --- .../op_handler/layer_norm_handler.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/layer_norm_handler.py b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/layer_norm_handler.py index c75fdbbb6..8062d0f4b 100644 --- a/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/layer_norm_handler.py +++ b/colossalai/auto_parallel/tensor_shard/deprecated/op_handler/layer_norm_handler.py @@ -2,10 +2,14 @@ import operator from functools import reduce import torch -from colossalai.auto_parallel.tensor_shard.deprecated._utils import (enumerate_all_possible_1d_sharding, - enumerate_all_possible_2d_sharding, - generate_sharding_size, ignore_sharding_exception) -from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import (ShardingStrategy, StrategiesVector) + +from colossalai.auto_parallel.tensor_shard.deprecated._utils import ( + enumerate_all_possible_1d_sharding, + enumerate_all_possible_2d_sharding, + generate_sharding_size, + ignore_sharding_exception, +) +from colossalai.auto_parallel.tensor_shard.deprecated.sharding_strategy import ShardingStrategy, StrategiesVector from .operator_handler import OperatorHandler @@ -63,19 +67,19 @@ class LayerNormHandler(OperatorHandler): Argument: sharding_size_forward(int): The forward activation will be divided into sharding_size_forward number partions. - sharding_size_backward_activation(int): The backward activation will + sharding_size_backward_activation(int): The backward activation will be divided into sharding_size_backward_activation number partions. sharding_size_weight(int): The backward weight will be divided into sharding_size_weight number partions. Return: - memory_cost(Tuple[float]): Memory cost per device with this + memory_cost(Tuple[float]): Memory cost per device with this specific strategy, the first element of this tuple is forward memory cost, and the second element of this tuple is backward memory cost. - memory_cost_forward(float): Memory cost of forward activation per + memory_cost_forward(float): Memory cost of forward activation per device with this specific strategy. - memory_cost_backward_activation(float): Memory cost of backward activation + memory_cost_backward_activation(float): Memory cost of backward activation per device with this specific strategy. ''' # compute the memory cost of this strategy @@ -216,7 +220,7 @@ class LayerNormHandler(OperatorHandler): norm_handler.register_strategy() for strategy in norm_handler.strategies_vector: print(f'{strategy.name}, computation_cost: {strategy.compute_cost}, memory_cost: {strategy.memory_cost}') - + Output: RS0 = RS0 x S0, computation_cost: 131072, memory_cost: 524288.0 RS1 = RS1 x S1, computation_cost: 131072, memory_cost: 524288.0