ColossalAI/colossalai/tensor/d_tensor/layout.py

import operator
from dataclasses import dataclass
from functools import reduce

import torch

from colossalai.device.device_mesh import DeviceMesh

from .misc import DuplicatedShardingDimensionError, LayoutException, ShardingNotDivisibleError
from .sharding_spec import ShardingSpec


class Layout:
    """Layout of a tensor.

    Attributes:
        device_mesh: the device mesh to store the tensor distributed.
        device_type: the type of the device mesh, e.g. 'cpu' or 'cuda'.
        sharding_spec: the sharding specification to describe how the tensor is sharded.
        entire_shape: the entire shape of the global tensor.
    """

    def __init__(self, device_mesh: DeviceMesh, device_type: torch.device, sharding_spec: ShardingSpec,
                 entire_shape: torch.Size):
        self.device_mesh = device_mesh
        self.device_type = device_type
        self.sharding_spec = sharding_spec
        self.entire_shape = entire_shape
        self._sanity_check()

    def __hash__(self) -> int:
        return hash(f'{self.sharding_spec}')

    def get_sharded_shape_per_device(self):
        sharded_shape = list(self.entire_shape)
        for dim, shard_list in self.sharding_spec.dim_partition_dict.items():
            mesh_list = [self.device_mesh.mesh_shape[mesh_dim] for mesh_dim in shard_list]
            shard_partitions = reduce(operator.mul, mesh_list, 1)
            assert sharded_shape[
                dim] % shard_partitions == 0, f'Cannot shard dimension {dim} into {shard_partitions} partitions.'
            sharded_shape[dim] //= shard_partitions
        return torch.Size(sharded_shape)

    def _sanity_check(self):
        sharding_spec = self.sharding_spec

        # make sure all axes in logical device mesh only be used once
        dim_check_list = list(range(self.device_mesh.logical_mesh_id.dim()))
        for dim, shard_list in sharding_spec.dim_partition_dict.items():
            for element in shard_list:
                if element in dim_check_list:
                    dim_check_list.remove(element)
                else:
                    raise DuplicatedShardingDimensionError(
                        f"find an invalid sharding axis {element} in dim_partition_dict in tensor dimension {dim}.")

        # make sure that the sharding for a dimension is divisible by the number of devices
        for dim, shard_list in sharding_spec.dim_partition_dict.items():
            tensor_dim_size = self.entire_shape[dim]
            num_devices = 1

            for element in shard_list:
                num_devices *= self.device_mesh.mesh_shape[element]

            if tensor_dim_size % num_devices != 0:
                raise ShardingNotDivisibleError(
                    f'The size of dimension at index {dim} is {tensor_dim_size}, it cannot be sharded over {num_devices} devices.'
                )
[DTensor] refactor sharding spec (#2987) * [autoparallel] refactor sharding spec * rename function name 2 years ago			`import operator`
[DTensor] implementation of dtensor (#2946) * [DTensor] implementation of dtensor * test layout convert * polish 2 years ago			`from dataclasses import dataclass`
[DTensor] refactor sharding spec (#2987) * [autoparallel] refactor sharding spec * rename function name 2 years ago			`from functools import reduce`
[DTensor] implementation of dtensor (#2946) * [DTensor] implementation of dtensor * test layout convert * polish 2 years ago
			`import torch`

			`from colossalai.device.device_mesh import DeviceMesh`
[DTensor] refactor sharding spec (#2987) * [autoparallel] refactor sharding spec * rename function name 2 years ago
			`from .misc import DuplicatedShardingDimensionError, LayoutException, ShardingNotDivisibleError`
			`from .sharding_spec import ShardingSpec`
[DTensor] implementation of dtensor (#2946) * [DTensor] implementation of dtensor * test layout convert * polish 2 years ago

			`class Layout:`
			`"""Layout of a tensor.`

			`Attributes:`
[doc] Fix typo under colossalai and doc(#3618) * Fixed several spelling errors under colossalai * Fix the spelling error in colossalai and docs directory * Cautious Changed the spelling error under the example folder * Update runtime_preparation_pass.py revert autograft to autograd * Update search_chunk.py utile to until * Update check_installation.py change misteach to mismatch in line 91 * Update 1D_tensor_parallel.md revert to perceptron * Update 2D_tensor_parallel.md revert to perceptron in line 73 * Update 2p5D_tensor_parallel.md revert to perceptron in line 71 * Update 3D_tensor_parallel.md revert to perceptron in line 80 * Update README.md revert to resnet in line 42 * Update reorder_graph.py revert to indice in line 7 * Update p2p.py revert to megatron in line 94 * Update initialize.py revert to torchrun in line 198 * Update routers.py change to detailed in line 63 * Update routers.py change to detailed in line 146 * Update README.md revert random number in line 402 2 years ago			`device_mesh: the device mesh to store the tensor distributed.`
[DTensor] implementation of dtensor (#2946) * [DTensor] implementation of dtensor * test layout convert * polish 2 years ago			`device_type: the type of the device mesh, e.g. 'cpu' or 'cuda'.`
			`sharding_spec: the sharding specification to describe how the tensor is sharded.`
			`entire_shape: the entire shape of the global tensor.`
			`"""`
[DTensor] refactor sharding spec (#2987) * [autoparallel] refactor sharding spec * rename function name 2 years ago
			`def __init__(self, device_mesh: DeviceMesh, device_type: torch.device, sharding_spec: ShardingSpec,`
			`entire_shape: torch.Size):`
			`self.device_mesh = device_mesh`
			`self.device_type = device_type`
			`self.sharding_spec = sharding_spec`
			`self.entire_shape = entire_shape`
			`self._sanity_check()`

			`def __hash__(self) -> int:`
			`return hash(f'{self.sharding_spec}')`

			`def get_sharded_shape_per_device(self):`
			`sharded_shape = list(self.entire_shape)`
			`for dim, shard_list in self.sharding_spec.dim_partition_dict.items():`
			`mesh_list = [self.device_mesh.mesh_shape[mesh_dim] for mesh_dim in shard_list]`
			`shard_partitions = reduce(operator.mul, mesh_list, 1)`
			`assert sharded_shape[`
			`dim] % shard_partitions == 0, f'Cannot shard dimension {dim} into {shard_partitions} partitions.'`
			`sharded_shape[dim] //= shard_partitions`
			`return torch.Size(sharded_shape)`

			`def _sanity_check(self):`
			`sharding_spec = self.sharding_spec`

			`# make sure all axes in logical device mesh only be used once`
			`dim_check_list = list(range(self.device_mesh.logical_mesh_id.dim()))`
			`for dim, shard_list in sharding_spec.dim_partition_dict.items():`
			`for element in shard_list:`
			`if element in dim_check_list:`
			`dim_check_list.remove(element)`
			`else:`
			`raise DuplicatedShardingDimensionError(`
			`f"find an invalid sharding axis {element} in dim_partition_dict in tensor dimension {dim}.")`

			`# make sure that the sharding for a dimension is divisible by the number of devices`
			`for dim, shard_list in sharding_spec.dim_partition_dict.items():`
			`tensor_dim_size = self.entire_shape[dim]`
			`num_devices = 1`

			`for element in shard_list:`
			`num_devices *= self.device_mesh.mesh_shape[element]`

			`if tensor_dim_size % num_devices != 0:`
			`raise ShardingNotDivisibleError(`
			`f'The size of dimension at index {dim} is {tensor_dim_size}, it cannot be sharded over {num_devices} devices.'`
			`)`