ColossalAI/colossalai/shardformer/shard/shardformer.py

from typing import Dict, List, Tuple

import torch.nn as nn
from torch import Tensor

from colossalai.cluster import DistCoordinator

from ..policies.base_policy import Policy
from .shard_config import ShardConfig
from .sharder import ModelSharder


class ShardFormer:
    """
    Parallelize model based on the given config and policy

    Example:

    ```python
    from colossalai.shardformer import ShardFormer, ShardConfig
    from transformers import BertForMaskedLM
    import colossalai
    import torch

    colossalai.launch_from_torch(config={})

    org_model = BertForMaskedLM.from_pretrained('bert-base-uncased')
    shard_config = ShardConfig()
    shard_former = ShardFormer(shard_config=shard_config)
    model, shared_params = shard_former.optimize(org_model)
    ```
    """

    def __init__(self, shard_config: ShardConfig):
        self.coordinator = DistCoordinator()
        self.shard_config = shard_config

    def optimize(self, model: nn.Module, policy: Policy = None) -> Tuple[nn.Module, List[Dict[int, Tensor]]]:
        r"""
        This method will optimize the model based on the given policy.

        Args:
            model (`torch.nn.Model`): the origin huggingface model
            shard_config (`ShardConfig`): the config for distribute information
            policy (`Policy`): the custom policy for sharding

        Returns: the sharded model and the shared parameters
        """
        sharder = ModelSharder(model=model, shard_config=self.shard_config, policy=policy)
        shared_params = sharder.shard()
        return model, shared_params
[pipeline] update shardformer docstring 1 year ago			`from typing import Dict, List, Tuple`

[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago			`import torch.nn as nn`
[pipeline] update shardformer docstring 1 year ago			`from torch import Tensor`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago
[shardformer] integrate with data parallelism (#4103) 1 year ago			`from colossalai.cluster import DistCoordinator`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago
[shardformer] rename policy file name 1 year ago			`from ..policies.base_policy import Policy`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago			`from .shard_config import ShardConfig`
			`from .sharder import ModelSharder`


			`class ShardFormer:`
			`"""`
			`Parallelize model based on the given config and policy`

			`Example:`

			```python
			`from colossalai.shardformer import ShardFormer, ShardConfig`
			`from transformers import BertForMaskedLM`
			`import colossalai`
			`import torch`

			`colossalai.launch_from_torch(config={})`

			`org_model = BertForMaskedLM.from_pretrained('bert-base-uncased')`
[shardformer] refactored some doc and api (#4137) * [shardformer] refactored some doc and api * polish code 1 year ago			`shard_config = ShardConfig()`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago			`shard_former = ShardFormer(shard_config=shard_config)`
[pipeline] update shardformer docstring 1 year ago			`model, shared_params = shard_former.optimize(org_model)`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago			```
			`"""`

			`def __init__(self, shard_config: ShardConfig):`
			`self.coordinator = DistCoordinator()`
			`self.shard_config = shard_config`

[pipeline] update shardformer docstring 1 year ago			`def optimize(self, model: nn.Module, policy: Policy = None) -> Tuple[nn.Module, List[Dict[int, Tensor]]]:`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago			`r"""`
[shardformer] refactored some doc and api (#4137) * [shardformer] refactored some doc and api * polish code 1 year ago			`This method will optimize the model based on the given policy.`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago
			`Args:`
			model (`torch.nn.Model`): the origin huggingface model
			shard_config (`ShardConfig`): the config for distribute information
			policy (`Policy`): the custom policy for sharding
[pipeline] update shardformer docstring 1 year ago
			`Returns: the sharded model and the shared parameters`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 1 year ago			`"""`
[shardformer] integrate with data parallelism (#4103) 1 year ago			`sharder = ModelSharder(model=model, shard_config=self.shard_config, policy=policy)`
[pipeline] update shardformer policy 1 year ago			`shared_params = sharder.shard()`
			`return model, shared_params`