[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
5 months ago · 996c65077e
2 changed files with 9 additions and 4 deletions
--- a/colossalai/shardformer/modeling/command.py
+++ b/colossalai/shardformer/modeling/command.py
@ -3,13 +3,18 @@ import warnings
 from typing import List, Optional, Tuple, Union
 import torch
 import torch.nn.functional as F
 import torch.utils.checkpoint
 from torch import nn
 from torch.nn import CrossEntropyLoss
 from transformers.cache_utils import Cache, DynamicCache
 from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
-from transformers.models.cohere.modeling_cohere import CohereForCausalLM, CohereModel, StaticCache, apply_rotary_pos_emb, repeat_kv
+from transformers.models.cohere.modeling_cohere import (
    CohereForCausalLM,
    CohereModel,
    StaticCache,
    apply_rotary_pos_emb,
    repeat_kv,
 )
 from transformers.utils import logging
 from colossalai.pipeline.stage_manager import PipelineStageManager
@ -584,6 +589,7 @@ def get_command_flash_attention_model_forward(shard_config, sp_mode=None, sp_siz
    return forward
 def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig):
    from transformers import CohereForCausalLM
--- a/colossalai/shardformer/policies/command.py
+++ b/colossalai/shardformer/policies/command.py
@ -112,7 +112,6 @@ class CommandPolicy(Policy):
                    target_key=CohereModel,
                )
        if self.shard_config.enable_tensor_parallelism:
            assert (
                self.model.config.num_attention_heads % self.shard_config.tensor_parallel_size == 0