ColossalAI/colossalai/nn/parallel/layers/module_utils.py

from typing import Dict
from colossalai.tensor import ColoParameter, ComputeSpec, TensorSpec
from . import ColoModule
import torch

_COLOSSAL_MODULES: Dict[type, ColoModule] = {}


def register_colo_module(module_type: type, colo_module: ColoModule):
    global _COLOSSAL_MODULES
    _COLOSSAL_MODULES[module_type] = colo_module


def is_colo_module(module: torch.nn.Module):
    global _COLOSSAL_MODULES
    for module_type in _COLOSSAL_MODULES.keys():
        if isinstance(module, module_type):
            return True
    return False


def get_colo_module(module: torch.nn.Module):
    global _COLOSSAL_MODULES
    if is_colo_module(module):
        for module_type, colo_module in _COLOSSAL_MODULES.items():
            if isinstance(module, module_type):
                return colo_module
    else:
        return None


def check_colo_module(module: torch.nn.Module, recursive=True):
    if is_colo_module(module):
        colo_module = get_colo_module(module)
        param_names = colo_module.get_param_names()
        compute_pattern = None
        for param_name in param_names:
            param = module.get_parameter(param_name)
            if not isinstance(param, ColoParameter):
                raise Exception(f'Invalid ColoParameter spec: {param} in {module} is not a ColoParameter.')
            if param.has_compute_spec():
                cur_compute_pattern = param.tensor_spec.compute_spec.compute_pattern
                if compute_pattern is None:
                    compute_pattern = cur_compute_pattern
                else:
                    if cur_compute_pattern != compute_pattern:
                        raise Exception(
                            f'Invalid ColoParameter spec: Params in {module} have different compute_pattern.')
            else:
                continue

        if compute_pattern is not None:
            colo_module.register(compute_pattern)
            if not colo_module.has_compute_pattern(compute_pattern):
                raise Exception(
                    f'Invalid ColoParameter spec: ComputePattern {compute_pattern} in {module} is not allowed.')

            match_specs = False
            allowed_specs = colo_module.get_dist_specs(compute_pattern)
            for _, param_specs in allowed_specs.items():
                cur_match = True
                for param_name, dist_spec in param_specs.items():
                    param = module.get_parameter(param_name)
                    if param.has_compute_spec():
                        if dist_spec != param.tensor_spec.dist_spec:
                            cur_match = False
                            break
                    else:
                        if dist_spec is not None:
                            cur_match = False
                            break
                if cur_match == True:
                    match_specs = True
                    break
            if match_specs == False:
                raise Exception(f'Invalid ColoParameter spec: Params in {module} are incorrectly sharded.')
    if recursive == True:
        for submodule in module.children():
            check_colo_module(submodule, recursive=True)


def init_colo_module(module: torch.nn.Module, parallel_action: ComputeSpec, recursive=True, mode='default'):
    compute_pattern = parallel_action.compute_pattern
    if is_colo_module(module):
        # for each param
        # set DistSpec and ComputeSpec
        colo_module = get_colo_module(module)
        colo_module.register(compute_pattern)
        if not colo_module.has_compute_pattern_with_mode(compute_pattern, mode=mode):
            raise NotImplementedError
        # a set for modules which update at least one param in the init process.
        # these modules need to be checked whether all params still match one of the valid compute pattern.
        modules_update_param = {module}
        for param_name, dist_spec in colo_module.get_dist_specs_with_mode(compute_pattern, mode=mode).items():
            if dist_spec is None:
                continue
            param = module.get_parameter(param_name)
            if isinstance(param, ColoParameter):
                spec = TensorSpec(dist_spec, parallel_action)
                param.set_tensor_spec(spec)
                for mod in param.shared_param_modules:
                    modules_update_param.add(mod)
        for mod in modules_update_param:
            check_colo_module(mod, recursive=False)
    if recursive == True:
        for submodule in module.children():
            init_colo_module(submodule, parallel_action, recursive=True, mode=mode)
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`from typing import Dict`
[Tensor] remove ParallelAction, use ComputeSpec instread (#1166) 2 years ago			`from colossalai.tensor import ColoParameter, ComputeSpec, TensorSpec`
[refactory] add nn.parallel module (#1068) 3 years ago			`from . import ColoModule`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`import torch`

			`_COLOSSAL_MODULES: Dict[type, ColoModule] = {}`


			`def register_colo_module(module_type: type, colo_module: ColoModule):`
			`global _COLOSSAL_MODULES`
			`_COLOSSAL_MODULES[module_type] = colo_module`

reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`def is_colo_module(module: torch.nn.Module):`
			`global _COLOSSAL_MODULES`
[Tensor] add hybrid device demo and fix bugs (#1059) 3 years ago			`for module_type in _COLOSSAL_MODULES.keys():`
fix module utils bug (#1066) 3 years ago			`if isinstance(module, module_type):`
[Tensor] add hybrid device demo and fix bugs (#1059) 3 years ago			`return True`
			`return False`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago
reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`def get_colo_module(module: torch.nn.Module):`
			`global _COLOSSAL_MODULES`
			`if is_colo_module(module):`
[Tensor] add hybrid device demo and fix bugs (#1059) 3 years ago			`for module_type, colo_module in _COLOSSAL_MODULES.items():`
fix module utils bug (#1066) 3 years ago			`if isinstance(module, module_type):`
[Tensor] add hybrid device demo and fix bugs (#1059) 3 years ago			`return colo_module`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`else:`
			`return None`

reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`def check_colo_module(module: torch.nn.Module, recursive=True):`
			`if is_colo_module(module):`
			`colo_module = get_colo_module(module)`
			`param_names = colo_module.get_param_names()`
			`compute_pattern = None`
			`for param_name in param_names:`
			`param = module.get_parameter(param_name)`
			`if not isinstance(param, ColoParameter):`
			`raise Exception(f'Invalid ColoParameter spec: {param} in {module} is not a ColoParameter.')`
[ColoTensor] rename APIs and add output_replicate to ComputeSpec (#1168) 2 years ago			`if param.has_compute_spec():`
			`cur_compute_pattern = param.tensor_spec.compute_spec.compute_pattern`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`if compute_pattern is None:`
			`compute_pattern = cur_compute_pattern`
			`else:`
			`if cur_compute_pattern != compute_pattern:`
reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago			`raise Exception(`
			`f'Invalid ColoParameter spec: Params in {module} have different compute_pattern.')`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`else:`
			`continue`
reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`if compute_pattern is not None:`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`colo_module.register(compute_pattern)`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`if not colo_module.has_compute_pattern(compute_pattern):`
reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago			`raise Exception(`
			`f'Invalid ColoParameter spec: ComputePattern {compute_pattern} in {module} is not allowed.')`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago
			`match_specs = False`
			`allowed_specs = colo_module.get_dist_specs(compute_pattern)`
			`for _, param_specs in allowed_specs.items():`
			`cur_match = True`
			`for param_name, dist_spec in param_specs.items():`
			`param = module.get_parameter(param_name)`
[ColoTensor] rename APIs and add output_replicate to ComputeSpec (#1168) 2 years ago			`if param.has_compute_spec():`
			`if dist_spec != param.tensor_spec.dist_spec:`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`cur_match = False`
			`break`
			`else:`
			`if dist_spec is not None:`
			`cur_match = False`
			`break`
			`if cur_match == True:`
			`match_specs = True`
			`break`
			`if match_specs == False:`
			`raise Exception(f'Invalid ColoParameter spec: Params in {module} are incorrectly sharded.')`
			`if recursive == True:`
			`for submodule in module.children():`
			`check_colo_module(submodule, recursive=True)`

reorgnize colotensor directory (#1062) * reorgnize colotensor directory * polish code 3 years ago
[Tensor] remove ParallelAction, use ComputeSpec instread (#1166) 2 years ago			`def init_colo_module(module: torch.nn.Module, parallel_action: ComputeSpec, recursive=True, mode='default'):`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`compute_pattern = parallel_action.compute_pattern`
			`if is_colo_module(module):`
			`# for each param`
[Tensor] remove ParallelAction, use ComputeSpec instread (#1166) 2 years ago			`# set DistSpec and ComputeSpec`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`colo_module = get_colo_module(module)`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`colo_module.register(compute_pattern)`
			`if not colo_module.has_compute_pattern_with_mode(compute_pattern, mode=mode):`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`raise NotImplementedError`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`# a set for modules which update at least one param in the init process.`
			`# these modules need to be checked whether all params still match one of the valid compute pattern.`
			`modules_update_param = {module}`
			`for param_name, dist_spec in colo_module.get_dist_specs_with_mode(compute_pattern, mode=mode).items():`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`if dist_spec is None:`
			`continue`
			`param = module.get_parameter(param_name)`
			`if isinstance(param, ColoParameter):`
			`spec = TensorSpec(dist_spec, parallel_action)`
[ColoTensor] rename APIs and add output_replicate to ComputeSpec (#1168) 2 years ago			`param.set_tensor_spec(spec)`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`for mod in param.shared_param_modules:`
			`modules_update_param.add(mod)`
			`for mod in modules_update_param:`
			`check_colo_module(mod, recursive=False)`
[Tensor] add module handler for linear (#1021) * add module spec for linear * polish * polish * polish 3 years ago			`if recursive == True:`
			`for submodule in module.children():`
[Tensor] add module check and bert test (#1031) * add Embedding * Add bert test * polish * add check module test * polish * polish * polish * polish 3 years ago			`init_colo_module(submodule, parallel_action, recursive=True, mode=mode)`