ColossalAI/colossalai/shardformer/policies/auto_policy.py

import importlib
from dataclasses import dataclass

import torch.nn as nn

from .base_policy import Policy

__all__ = ["PolicyLocation", "get_autopolicy", "import_policy"]


@dataclass
class PolicyLocation:
    """
    PolicyLocation describes the location of a policy class.

    Args:
        file_name (str): The file name of the policy under colossalai.shardformer.policies
        class_name (str): The class name of the policy class
    """
    file_name: str
    class_name: str


# we don't want to import all policies here
# as each policy file imports its own model zoo library
# we will allow the user to only import the policy file needed
_POLICY_LIST = {
    # BERT
    "transformers.models.bert.modeling_bert.BertModel":
        PolicyLocation(file_name="bert", class_name="BertModelPolicy"),
    "transformers.models.bert.modeling_bert.BertForPreTraining":
        PolicyLocation(file_name="bert", class_name="BertForPreTrainingPolicy"),
    "transformers.models.bert.modeling_bert.BertLMHeadModel":
        PolicyLocation(file_name="bert", class_name="BertLMHeadModelPolicy"),
    "transformers.models.bert.modeling_bert.BertForMaskedLM":
        PolicyLocation(file_name="bert", class_name="BertForMaskedLMPolicy"),
    "transformers.models.bert.modeling_bert.BertForSequenceClassification":
        PolicyLocation(file_name="bert", class_name="BertForSequenceClassificationPolicy"),
    "transformers.models.bert.modeling_bert.BertForTokenClassification":
        PolicyLocation(file_name="bert", class_name="BertForTokenClassificationPolicy"),
    "transformers.models.bert.modeling_bert.BertForNextSentencePrediction":
        PolicyLocation(file_name="bert", class_name="BertForNextSentencePredictionPolicy"),
    "transformers.models.bert.modeling_bert.BertForMultipleChoice":
        PolicyLocation(file_name="bert", class_name="BertForMultipleChoicePolicy"),

    # LLaMA
    "transformers.models.llama.modeling_llama.LlamaModel":
        PolicyLocation(file_name="llama", class_name="LlamaPolicy"),
    "transformers.models.llama.modeling_llama.LlamaForCausalLM":
        PolicyLocation(file_name="llama", class_name="LlamaForCausalLMPolicy"),
    "transformers.models.llama.modeling_llama.LlamaForSequenceClassification":
        PolicyLocation(file_name="llama", class_name="LlamaForSequenceClassificationPolicy"),

    # T5
    "transformers.models.t5.modeling_t5.T5Model":
        PolicyLocation(file_name="t5", class_name="T5ModelPolicy"),
    "transformers.models.t5.modeling_t5.T5ForConditionalGeneration":
        PolicyLocation(file_name="t5", class_name="T5ForConditionalGenerationPolicy"),
    "transformers.models.t5.modeling_t5.T5EncoderModel":
        PolicyLocation(file_name="t5", class_name="T5EncoderPolicy"),

    # GPT2
    "transformers.models.gpt2.modeling_gpt2.GPT2Model":
        PolicyLocation(file_name="gpt2", class_name="GPT2ModelPolicy"),
    "transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel":
        PolicyLocation(file_name="gpt2", class_name="GPT2LMHeadModelPolicy"),
    "transformers.models.gpt2.modeling_gpt2.GPT2DoubleHeadsModel":
        PolicyLocation(file_name="gpt2", class_name="GPT2DoubleHeadsModelPolicy"),
    "transformers.models.gpt2.modeling_gpt2.GPT2ForTokenClassification":
        PolicyLocation(file_name="gpt2", class_name="GPT2ForTokenClassificationPolicy"),
    "transformers.models.gpt2.modeling_gpt2.GPT2ForSequenceClassification":
        PolicyLocation(file_name="gpt2", class_name="GPT2ForSequenceClassificationPolicy"),

    # OPT
    "transformers.models.opt.modeling_opt.OPTModel":
        PolicyLocation(file_name="opt", class_name="OPTModelPolicy"),
    "transformers.models.opt.modeling_opt.OPTForCausalLM":
        PolicyLocation(file_name="opt", class_name="OPTForCausalLMPolicy"),
    "transformers.models.opt.modeling_opt.OPTForSequenceClassification":
        PolicyLocation(file_name="opt", class_name="OPTForSequenceClassificationPolicy"),
    "transformers.models.opt.modeling_opt.OPTForQuestionAnswering":
        PolicyLocation(file_name="opt", class_name="OPTForQuestionAnsweringPolicy"),

    # Bloom
    "transformers.models.bloom.modeling_bloom.BloomModel":
        PolicyLocation(file_name="bloom", class_name="BloomModelPolicy"),
    "transformers.models.bloom.modeling_bloom.BloomForCausalLM":
        PolicyLocation(file_name="bloom", class_name="BloomForCausalLMPolicy"),
    "transformers.models.bloom.modeling_bloom.BloomForSequenceClassification":
        PolicyLocation(file_name="bloom", class_name="BloomForSequenceClassificationPolicy"),
    "transformers.models.bloom.modeling_bloom.BloomForTokenClassification":
        PolicyLocation(file_name="bloom", class_name="BloomForTokenClassificationPolicy"),
    "transformers.models.bloom.modeling_bloom.BloomForQuestionAnswering":
        PolicyLocation(file_name="bloom", class_name="BloomForQuestionAnsweringPolicy"),
}


def import_policy(policy_location: PolicyLocation) -> Policy:
    """
    Dynamically import a Policy class based on the policy location.
    """
    module_name = f"colossalai.shardformer.policies.{policy_location.file_name}"
    module = importlib.import_module(module_name)
    return getattr(module, policy_location.class_name)


def _fullname(obj):
    """
    Return the full name of an object, including the module name.
    """
    klass = obj.__class__
    module = klass.__module__
    if module == 'builtins':
        return klass.__qualname__    # avoid outputs like 'builtins.str'
    return module + '.' + klass.__qualname__


def get_autopolicy(model: nn.Module) -> Policy:
    r"""
    Return the auto policy for the model

    Args:
        model (:class:`nn.Module`): The model to get the auto policy

    Return:
        :class:`Policy`: The auto policy for the model
    """
    full_name = _fullname(model)
    policy_location = _POLICY_LIST.get(full_name, None)

    if policy_location is None:
        raise NotImplementedError(
            f"Auto policy for {model.__class__.__qualname__} is not implemented\n. Supported models are {list(_POLICY_LIST.keys())}"
        )
    else:
        policy = import_policy(policy_location)
    return policy()
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`import importlib`
			`from dataclasses import dataclass`

[shardformer] init shardformer code structure (#3731) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example 2023-05-22 07:02:17 +00:00			`import torch.nn as nn`

[shardformer] rename policy file name 2023-07-05 07:13:00 +00:00			`from .base_policy import Policy`
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 2023-06-15 09:55:42 +00:00
[shardformer] import huggingface implicitly (#4101) 2023-06-30 02:56:29 +00:00			`__all__ = ["PolicyLocation", "get_autopolicy", "import_policy"]`

[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`@dataclass`
			`class PolicyLocation:`
[shardformer] init shardformer code structure (#3731) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example 2023-05-22 07:02:17 +00:00			`"""`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`PolicyLocation describes the location of a policy class.`
[Shardformer] Downstream bert (#3979) * add dist dropout in model * update docstring and bert policy with dropout * refactor basepolicy and sharded, update bert * update format * update gpt2 policy * update bert policy * remove unused code * update readme for new policy usage * add downstream model of bert * remove unused code 2023-06-15 09:56:51 +00:00
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`Args:`
			`file_name (str): The file name of the policy under colossalai.shardformer.policies`
			`class_name (str): The class name of the policy class`
			`"""`
			`file_name: str`
			`class_name: str`


			`# we don't want to import all policies here`
			`# as each policy file imports its own model zoo library`
			`# we will allow the user to only import the policy file needed`
			`_POLICY_LIST = {`
			`# BERT`
			`"transformers.models.bert.modeling_bert.BertModel":`
support kit use for bert/gpt test (#4055) * support kit use for bert test * support kit test for gpt2 2023-06-22 02:33:06 +00:00			`PolicyLocation(file_name="bert", class_name="BertModelPolicy"),`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`"transformers.models.bert.modeling_bert.BertForPreTraining":`
[pipeline] move bert related pipeline components to shardformer (#4187) * move bert related pipeline components to shardformer * fix bugs * revision * fix bert model tests * fix bert_lm_head model tests * fix tests * fix tests * done checks * skip bloom 2023-07-07 07:41:00 +00:00			`PolicyLocation(file_name="bert", class_name="BertForPreTrainingPolicy"),`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`"transformers.models.bert.modeling_bert.BertLMHeadModel":`
			`PolicyLocation(file_name="bert", class_name="BertLMHeadModelPolicy"),`
support kit use for bert/gpt test (#4055) * support kit use for bert test * support kit test for gpt2 2023-06-22 02:33:06 +00:00			`"transformers.models.bert.modeling_bert.BertForMaskedLM":`
			`PolicyLocation(file_name="bert", class_name="BertForMaskedLMPolicy"),`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`"transformers.models.bert.modeling_bert.BertForSequenceClassification":`
			`PolicyLocation(file_name="bert", class_name="BertForSequenceClassificationPolicy"),`
support kit use for bert/gpt test (#4055) * support kit use for bert test * support kit test for gpt2 2023-06-22 02:33:06 +00:00			`"transformers.models.bert.modeling_bert.BertForTokenClassification":`
			`PolicyLocation(file_name="bert", class_name="BertForTokenClassificationPolicy"),`
			`"transformers.models.bert.modeling_bert.BertForNextSentencePrediction":`
			`PolicyLocation(file_name="bert", class_name="BertForNextSentencePredictionPolicy"),`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`"transformers.models.bert.modeling_bert.BertForMultipleChoice":`
			`PolicyLocation(file_name="bert", class_name="BertForMultipleChoicePolicy"),`

			`# LLaMA`
			`"transformers.models.llama.modeling_llama.LlamaModel":`
			`PolicyLocation(file_name="llama", class_name="LlamaPolicy"),`
			`"transformers.models.llama.modeling_llama.LlamaForCausalLM":`
			`PolicyLocation(file_name="llama", class_name="LlamaForCausalLMPolicy"),`
			`"transformers.models.llama.modeling_llama.LlamaForSequenceClassification":`
			`PolicyLocation(file_name="llama", class_name="LlamaForSequenceClassificationPolicy"),`

			`# T5`
[shardformer] supported T5 and its variants (#4045) 2023-06-19 09:57:37 +00:00			`"transformers.models.t5.modeling_t5.T5Model":`
			`PolicyLocation(file_name="t5", class_name="T5ModelPolicy"),`
			`"transformers.models.t5.modeling_t5.T5ForConditionalGeneration":`
			`PolicyLocation(file_name="t5", class_name="T5ForConditionalGenerationPolicy"),`
			`"transformers.models.t5.modeling_t5.T5EncoderModel":`
			`PolicyLocation(file_name="t5", class_name="T5EncoderPolicy"),`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00
			`# GPT2`
[shardformer] add gpt2 test and layer class refactor (#4041) * add gpt2 test and layer class refactor * add dropout in gpt2 policy 2023-06-20 03:45:16 +00:00			`"transformers.models.gpt2.modeling_gpt2.GPT2Model":`
			`PolicyLocation(file_name="gpt2", class_name="GPT2ModelPolicy"),`
support kit use for bert/gpt test (#4055) * support kit use for bert test * support kit test for gpt2 2023-06-22 02:33:06 +00:00			`"transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel":`
			`PolicyLocation(file_name="gpt2", class_name="GPT2LMHeadModelPolicy"),`
			`"transformers.models.gpt2.modeling_gpt2.GPT2DoubleHeadsModel":`
			`PolicyLocation(file_name="gpt2", class_name="GPT2DoubleHeadsModelPolicy"),`
			`"transformers.models.gpt2.modeling_gpt2.GPT2ForTokenClassification":`
			`PolicyLocation(file_name="gpt2", class_name="GPT2ForTokenClassificationPolicy"),`
			`"transformers.models.gpt2.modeling_gpt2.GPT2ForSequenceClassification":`
			`PolicyLocation(file_name="gpt2", class_name="GPT2ForSequenceClassificationPolicy"),`
[shardformer] shardformer support opt models (#4091) * [shardformer] shardformer support opt models * [shardformer] shardformer support opt models, fix * [shardformer] shardformer support opt models, fix * [shardformer] shardformer support opt models, fix 2023-06-27 09:39:29 +00:00
			`# OPT`
			`"transformers.models.opt.modeling_opt.OPTModel":`
			`PolicyLocation(file_name="opt", class_name="OPTModelPolicy"),`
			`"transformers.models.opt.modeling_opt.OPTForCausalLM":`
			`PolicyLocation(file_name="opt", class_name="OPTForCausalLMPolicy"),`
			`"transformers.models.opt.modeling_opt.OPTForSequenceClassification":`
			`PolicyLocation(file_name="opt", class_name="OPTForSequenceClassificationPolicy"),`
			`"transformers.models.opt.modeling_opt.OPTForQuestionAnswering":`
			`PolicyLocation(file_name="opt", class_name="OPTForQuestionAnsweringPolicy"),`
[format] applied code formatting on changed files in pull request 4152 (#4157) Co-authored-by: github-actions <github-actions@github.com> 2023-07-04 08:07:47 +00:00
[shardformer] supported bloom model (#4098) 2023-06-28 07:04:35 +00:00			`# Bloom`
			`"transformers.models.bloom.modeling_bloom.BloomModel":`
			`PolicyLocation(file_name="bloom", class_name="BloomModelPolicy"),`
			`"transformers.models.bloom.modeling_bloom.BloomForCausalLM":`
			`PolicyLocation(file_name="bloom", class_name="BloomForCausalLMPolicy"),`
			`"transformers.models.bloom.modeling_bloom.BloomForSequenceClassification":`
			`PolicyLocation(file_name="bloom", class_name="BloomForSequenceClassificationPolicy"),`
			`"transformers.models.bloom.modeling_bloom.BloomForTokenClassification":`
			`PolicyLocation(file_name="bloom", class_name="BloomForTokenClassificationPolicy"),`
			`"transformers.models.bloom.modeling_bloom.BloomForQuestionAnswering":`
			`PolicyLocation(file_name="bloom", class_name="BloomForQuestionAnsweringPolicy"),`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`}`


			`def import_policy(policy_location: PolicyLocation) -> Policy:`
			`"""`
			`Dynamically import a Policy class based on the policy location.`
			`"""`
			`module_name = f"colossalai.shardformer.policies.{policy_location.file_name}"`
			`module = importlib.import_module(module_name)`
			`return getattr(module, policy_location.class_name)`
[shardformer] support llama model using shardformer (#3969) adjust layer attr 2023-06-13 06:44:40 +00:00
[shardformer] add gpt2 policy and modify shard and slicer to support (#3883) * add gpt2 policy and modify shard and slicer to support * remove unused code * polish code 2023-06-07 08:09:40 +00:00
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`def _fullname(obj):`
			`"""`
			`Return the full name of an object, including the module name.`
			`"""`
			`klass = obj.__class__`
			`module = klass.__module__`
			`if module == 'builtins':`
			`return klass.__qualname__ # avoid outputs like 'builtins.str'`
			`return module + '.' + klass.__qualname__`
[shardformer] init shardformer code structure (#3731) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example 2023-05-22 07:02:17 +00:00
[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00
[shardformer] Refactor shardformer api (#4001) * fix an error in readme * simplify code * refactor shardformer * add todo * remove slicer * resolve code review 2023-06-15 09:55:42 +00:00			`def get_autopolicy(model: nn.Module) -> Policy:`
[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00			`r"""`
[shardformer] init shardformer code structure (#3731) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example 2023-05-22 07:02:17 +00:00			`Return the auto policy for the model`

			`Args:`
[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00			model (:class:`nn.Module`): The model to get the auto policy
[shardformer] init shardformer code structure (#3731) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example 2023-05-22 07:02:17 +00:00
			`Return:`
[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00			:class:`Policy`: The auto policy for the model
[shardformer] init shardformer code structure (#3731) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example 2023-05-22 07:02:17 +00:00			`"""`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`full_name = _fullname(model)`
			`policy_location = _POLICY_LIST.get(full_name, None)`

			`if policy_location is None:`
[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00			`raise NotImplementedError(`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`f"Auto policy for {model.__class__.__qualname__} is not implemented\n. Supported models are {list(_POLICY_LIST.keys())}"`
[shardformer]: Feature/shardformer, add some docstring and readme (#3816) * init shardformer code structure * add implement of sharder (inject and replace) * add implement of replace layer to colossal layer * separate different layer policy, add some notion * implement 1d and 2d slicer, can tell col or row * fix bug when slicing and inject model * fix some bug; add inference test example * add share weight and train example * add train * add docstring and readme * add docstring for other files * pre-commit 2023-05-24 02:26:46 +00:00			`)`
[shardformer] adapted llama to the new API (#4036) 2023-06-19 05:53:17 +00:00			`else:`
			`policy = import_policy(policy_location)`
			`return policy()`