fix typo change dosen't to doesn't (#5308)

pull/5338/head^2
digger yu 10 months ago committed by GitHub
parent 6a3086a505
commit 71321a07cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -49,7 +49,7 @@ class FalconPolicy(Policy):
if not self.model.config.new_decoder_architecture and self.model.config.multi_query:
warnings.warn(
"Falcon dosen't support tensor parallelism when (not new_decoder_architecture and multi_query) is True, will ignore the tensor parallelism flag."
"Falcon doesn't support tensor parallelism when (not new_decoder_architecture and multi_query) is True, will ignore the tensor parallelism flag."
)
self.shard_config.enable_tensor_parallelism = False

@ -46,7 +46,7 @@ class LlamaPolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
warnings.warn("Llama dosen't support sequence parallelism now, will ignore the sequence parallelism flag.")
warnings.warn("Llama doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism:
decoder_attribute_replacement = {

@ -35,7 +35,7 @@ class MistralPolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
warnings.warn(
"Mistral dosen't support sequence parallelism now, will ignore the sequence parallelism flag."
"Mistral doesn't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if self.shard_config.enable_tensor_parallelism:
@ -136,7 +136,7 @@ class MistralModelPolicy(MistralPolicy):
def module_policy(self):
if self.pipeline_stage_manager:
warnings.warn("Mistral dosen't support pipeline parallelism now.")
warnings.warn("Mistral doesn't support pipeline parallelism now.")
return super().module_policy()
@ -160,7 +160,7 @@ class MistralForCausalLMPolicy(MistralPolicy):
}
if self.pipeline_stage_manager:
warnings.warn("Mistral dosen't support pipeline parallelism now.")
warnings.warn("Mistral doesn't support pipeline parallelism now.")
policy.update(new_item)
@ -186,7 +186,7 @@ class MistralForSequenceClassificationPolicy(MistralPolicy):
}
if self.pipeline_stage_manager:
warnings.warn("Mistral dosen't support pipeline parallelism now.")
warnings.warn("Mistral doesn't support pipeline parallelism now.")
policy.update(new_item)
return policy

@ -59,7 +59,7 @@ class OPTPolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
warnings.warn("OPT dosen't support sequence parallelism now, will ignore the sequence parallelism flag.")
warnings.warn("OPT doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism:
policy[OPTDecoder] = ModulePolicyDescription(

@ -66,7 +66,7 @@ class T5BasePolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
warnings.warn("T5 dosen't support sequence parallelism now, will ignore the sequence parallelism flag.")
warnings.warn("T5 doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism:
policy[T5Stack] = ModulePolicyDescription(
@ -263,7 +263,7 @@ class T5BasePolicy(Policy):
if num_decoder_layers == 0:
return Policy.distribute_layers(num_encoder_layers, num_stages), num_stages
# the number of stages distributed between encoder and decoder is optmized in this way:
# the number of stages distributed between encoder and decoder is optimized in this way:
# num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages))
# s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1
def objective(num_encoder_stages):

@ -33,7 +33,7 @@ class ViTPolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
warnings.warn("Vit dosen't support sequence parallelism now, will ignore the sequence parallelism flag.")
warnings.warn("Vit doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism:
policy[ViTEmbeddings] = ModulePolicyDescription(

@ -69,13 +69,13 @@ class WhisperPolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
warnings.warn(
"Whisper dosen't support sequence parallelism now, will ignore the sequence parallelism flag."
"Whisper doesn't support sequence parallelism now, will ignore the sequence parallelism flag."
)
# TODO using the jit fused add_and_dropout affect the accuracy
if self.shard_config.enable_jit_fused:
self.shard_config.enable_jit_fused = False
warnings.warn("Whisper dosen't support jit fused operator now, will ignore the jit fused operator flag.")
warnings.warn("Whisper doesn't support jit fused operator now, will ignore the jit fused operator flag.")
if self.shard_config.enable_tensor_parallelism:
policy[WhisperEncoderLayer] = ModulePolicyDescription(
@ -302,7 +302,7 @@ class WhisperPolicy(Policy):
if num_decoder_layers == 0:
return Policy.distribute_layers(num_encoder_layers, num_stages), num_stages
# the number of stages distributed between encoder and decoder is optmized in this way:
# the number of stages distributed between encoder and decoder is optimized in this way:
# num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages))
# s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1
def objective(num_encoder_stages):

@ -43,7 +43,7 @@ class OpenMoePolicy(Policy):
if self.shard_config.enable_sequence_parallelism:
self.shard_config.enable_sequence_parallelism = False
raise NotImplementedError(
"openmoe dosen't support sequence parallelism now, will ignore the sequence parallelism flag.")
"openmoe doesn't support sequence parallelism now, will ignore the sequence parallelism flag.")
if self.shard_config.enable_tensor_parallelism:
raise NotImplementedError("Tensor parallelism is not supported for openmoe model now.")

Loading…
Cancel
Save