ColossalAI/colossalai/nn/_ops/loss.py

import torch
import torch.nn.functional as F
from typing import Optional
from colossalai.tensor.op_wrapper import colo_op_impl
from colossalai.tensor import ColoTensor, ColoTensorSpec
from colossalai.nn.loss.loss_1d import VocabParallelCrossEntropyLoss1D
from ._utils import GeneralTensor, convert_to_colo_tensor


@colo_op_impl(F.cross_entropy)
def colo_cross_entropy(input_tensor: GeneralTensor,
                       target: GeneralTensor,
                       weight: Optional[GeneralTensor] = None,
                       size_average: Optional[bool] = None,
                       ignore_index: int = -100,
                       reduce: Optional[bool] = None,
                       reduction: str = "mean",
                       label_smoothing: float = 0.0):
    assert isinstance(weight, ColoTensor) or isinstance(target, ColoTensor) or isinstance(input_tensor, ColoTensor)
    pg = input_tensor.get_process_group() if isinstance(input_tensor, ColoTensor) else isinstance(target, ColoTensor)
    weight = convert_to_colo_tensor(weight, pg)
    target = convert_to_colo_tensor(target, pg)
    input_tensor = convert_to_colo_tensor(input_tensor, pg)

    if input_tensor.is_replicate():    # Input is gathered
        assert target.is_replicate() and (weight is None or weight.is_replicate()), \
            "Target tensor and weight tensor both should be complete"
        output = F.cross_entropy(input_tensor,
                                 target,
                                 weight=weight,
                                 size_average=size_average,
                                 ignore_index=ignore_index,
                                 reduce=reduce,
                                 reduction=reduction,
                                 label_smoothing=label_smoothing)
        return ColoTensor.from_torch_tensor(output, ColoTensorSpec(pg))
    elif input_tensor.has_compute_spec():    # Single Model Parallel Applied
        if input_tensor.is_shard_1dcol():
            assert weight is None, "Current TP cross entropy loss function doesn't support passing weight tensor in"
            assert target.is_replicate(), "Target tensor should be complete in TP cross entropy loss function"
            output = VocabParallelCrossEntropyLoss1D()(input_tensor,
                                                       target,
                                                       process_group=input_tensor.process_group.tp_process_group())
            return ColoTensor.from_torch_tensor(output, ColoTensorSpec(pg))
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError
[tensor] add cross_entrophy_loss (#868) 3 years ago			`import torch`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`import torch.nn.functional as F`
			`from typing import Optional`
[tensor] add cross_entrophy_loss (#868) 3 years ago			`from colossalai.tensor.op_wrapper import colo_op_impl`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`from colossalai.tensor import ColoTensor, ColoTensorSpec`
[Tensor] add 1d vocab loss (#918) * add 1d vocab loss * polish 3 years ago			`from colossalai.nn.loss.loss_1d import VocabParallelCrossEntropyLoss1D`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`from ._utils import GeneralTensor, convert_to_colo_tensor`
[tensor] add cross_entrophy_loss (#868) 3 years ago
[tensor] derive compute pattern from dist spec (#971) * derive compute pattern from dist spec * polish code 3 years ago
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`@colo_op_impl(F.cross_entropy)`
			`def colo_cross_entropy(input_tensor: GeneralTensor,`
			`target: GeneralTensor,`
			`weight: Optional[GeneralTensor] = None,`
			`size_average: Optional[bool] = None,`
			`ignore_index: int = -100,`
			`reduce: Optional[bool] = None,`
			`reduction: str = "mean",`
			`label_smoothing: float = 0.0):`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`assert isinstance(weight, ColoTensor) or isinstance(target, ColoTensor) or isinstance(input_tensor, ColoTensor)`
			`pg = input_tensor.get_process_group() if isinstance(input_tensor, ColoTensor) else isinstance(target, ColoTensor)`
			`weight = convert_to_colo_tensor(weight, pg)`
			`target = convert_to_colo_tensor(target, pg)`
			`input_tensor = convert_to_colo_tensor(input_tensor, pg)`
[tensor] add cross_entrophy_loss (#868) 3 years ago
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`if input_tensor.is_replicate(): # Input is gathered`
[tensor] fix a assertion in colo_tensor cross_entropy (#1232) 2 years ago			`assert target.is_replicate() and (weight is None or weight.is_replicate()), \`
[tensor] add unitest for colo_tensor 1DTP cross_entropy (#1230) 2 years ago			`"Target tensor and weight tensor both should be complete"`
[tensor] refactor colo-tensor (#992) * refactor colo-tensor and update linear op * polish code * polish code * update ops and unit tests * update unit tests * polish code * rename dist_spec module * polish code * polish code * remove unneeded import * fix pipelinable 3 years ago			`output = F.cross_entropy(input_tensor,`
			`target,`
			`weight=weight,`
			`size_average=size_average,`
			`ignore_index=ignore_index,`
			`reduce=reduce,`
			`reduction=reduction,`
			`label_smoothing=label_smoothing)`
[tensor] add unitest for colo_tensor 1DTP cross_entropy (#1230) 2 years ago			`return ColoTensor.from_torch_tensor(output, ColoTensorSpec(pg))`
[ColoTensor] rename APIs and add output_replicate to ComputeSpec (#1168) 2 years ago			`elif input_tensor.has_compute_spec(): # Single Model Parallel Applied`
[refactor] move process group from _DistSpec to ColoTensor. (#1203) 2 years ago			`if input_tensor.is_shard_1dcol():`
[tensor] add unitest for colo_tensor 1DTP cross_entropy (#1230) 2 years ago			`assert weight is None, "Current TP cross entropy loss function doesn't support passing weight tensor in"`
			`assert target.is_replicate(), "Target tensor should be complete in TP cross entropy loss function"`
			`output = VocabParallelCrossEntropyLoss1D()(input_tensor,`
			`target,`
			`process_group=input_tensor.process_group.tp_process_group())`
			`return ColoTensor.from_torch_tensor(output, ColoTensorSpec(pg))`
[Tensor] add 1d vocab loss (#918) * add 1d vocab loss * polish 3 years ago			`else:`
			`raise NotImplementedError`
			`else:`
			`raise NotImplementedError`