mirror of https://github.com/hpcaitech/ColossalAI
[nfc] fix typo colossalai/ applications/ (#3831)
* fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. * fix typo colossalai/auto_parallel autochunk fx/passes etc. * fix typo docs/ * change placememt_policy to placement_policy in docs/ and examples/ * fix typo colossalai/ applications/pull/3846/head^2
parent
a64df3fa97
commit
e2d81eba0d
|
@ -34,7 +34,7 @@ class DetachedReplayBuffer:
|
|||
'''
|
||||
Workers in the same tp group share this buffer and need same sample for one step.
|
||||
Therefore a held_sample should be returned tp_world_size times before it could be dropped.
|
||||
worker_state records wheter a worker got the held_sample
|
||||
worker_state records whether a worker got the held_sample
|
||||
'''
|
||||
self.tp_world_size = tp_world_size
|
||||
self.worker_state = [False] * self.tp_world_size
|
||||
|
|
|
@ -22,7 +22,7 @@ from .utils import is_rank_0, get_strategy_from_args, set_dist_env
|
|||
class ExperienceMakerHolder:
|
||||
'''
|
||||
Args:
|
||||
detached_trainer_name_list: str list to get ray actor handleskkk
|
||||
detached_trainer_name_list: str list to get ray actor handles
|
||||
strategy:
|
||||
experience_batch_size: batch size of generated experience
|
||||
kl_coef: the coefficient of kl divergence loss
|
||||
|
|
|
@ -26,7 +26,7 @@ rpc_is_initialized = _is_current_rpc_agent_set
|
|||
class PipelineModel(torch.nn.Module):
|
||||
'''
|
||||
Actor has 2 kinds of jobs: forward and generate.
|
||||
better to just pipelinize the inner model
|
||||
better to just pipeline the inner model
|
||||
'''
|
||||
def __init__(self,
|
||||
model: torch.nn.Module,
|
||||
|
|
|
@ -119,7 +119,7 @@ class Evaluator(object):
|
|||
jdump(all_evaluations,
|
||||
os.path.join(evaluation_results_save_path, f"{model_name_list[0]}_evaluation_results.json"))
|
||||
|
||||
# Start to calculate scores and save statictics.
|
||||
# Start to calculate scores and save statistics.
|
||||
evaluation_statistics_save_path = os.path.join(base_save_path, "evaluation_statistics")
|
||||
gpt_evaluate.save_gpt35_evaluation_statistics(model_name_list[0], all_evaluations,
|
||||
evaluation_statistics_save_path)
|
||||
|
|
|
@ -111,7 +111,7 @@ def calculate_precision_recall_f1(preds: list, targets: list) -> dict:
|
|||
The calculation of precision, recall and f1-score is realized by counting
|
||||
the number f overlaps between the preds and target. The comparison length
|
||||
limited by the shorter one of preds and targets. This design is mainly
|
||||
considered for classifiction and extraction categories.
|
||||
considered for classification and extraction categories.
|
||||
"""
|
||||
precision_recall_f1 = {"precision": 0, "recall": 0, "f1_score": 0}
|
||||
precision_scores = []
|
||||
|
@ -138,7 +138,7 @@ def calculate_precision_recall_f1(preds: list, targets: list) -> dict:
|
|||
|
||||
def precision(preds: list, targets: list) -> dict:
|
||||
"""Calculate Precision Metric
|
||||
(design for classifiction and extraction categories)
|
||||
(design for classification and extraction categories)
|
||||
|
||||
Calculating precision by counting the number of overlaps between the preds and target.
|
||||
"""
|
||||
|
@ -149,7 +149,7 @@ def precision(preds: list, targets: list) -> dict:
|
|||
|
||||
def recall(preds: list, targets: list) -> dict:
|
||||
"""Calculate Recall Metric
|
||||
(design for classifiction and extraction categories)
|
||||
(design for classification and extraction categories)
|
||||
|
||||
Calculating recall by counting the number of overlaps between the preds and target.
|
||||
"""
|
||||
|
@ -160,7 +160,7 @@ def recall(preds: list, targets: list) -> dict:
|
|||
|
||||
def F1_score(preds: list, targets: list) -> dict:
|
||||
"""Calculate F1-score Metric
|
||||
(design for classifiction and extraction categories)
|
||||
(design for classification and extraction categories)
|
||||
|
||||
Calculating f1-score by counting the number of overlaps between the preds and target.
|
||||
"""
|
||||
|
|
|
@ -206,7 +206,7 @@ class Broadcaster(BmmTransform):
|
|||
# e.g. [1, 2, 4] x [4, 4, 8] -> [4, 2, 8]
|
||||
# the dim 0 of [1, 2, 4] is multiplied to 4
|
||||
tensor_shape[dim_idx] = 1
|
||||
elif broadcast_type == BroadcastType.PADDDING:
|
||||
elif broadcast_type == BroadcastType.PADDING:
|
||||
# if the dim is padded
|
||||
# we remove its sharding
|
||||
tensor_shape[dim_idx] = None
|
||||
|
|
|
@ -21,7 +21,7 @@ __all__ = [
|
|||
|
||||
class BroadcastType(Enum):
|
||||
EQUAL = auto()
|
||||
PADDDING = auto()
|
||||
PADDING = auto()
|
||||
MULTIPLE = auto()
|
||||
|
||||
|
||||
|
@ -69,18 +69,18 @@ def get_broadcast_dim_info(logical_shape, physical_shape):
|
|||
for i in range(logical_num_dims):
|
||||
# get the trailing dim size
|
||||
logical_dim_idx = logical_num_dims - i - 1
|
||||
phyiscal_dim_idx = physical_num_dims - i - 1
|
||||
physical_dim_idx = physical_num_dims - i - 1
|
||||
logical_dim_size = logical_shape[logical_dim_idx]
|
||||
|
||||
if phyiscal_dim_idx >= 0:
|
||||
physical_dim_size = physical_shape[phyiscal_dim_idx]
|
||||
if physical_dim_idx >= 0:
|
||||
physical_dim_size = physical_shape[physical_dim_idx]
|
||||
|
||||
if physical_dim_size == logical_dim_size:
|
||||
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.EQUAL
|
||||
elif physical_dim_size == 1 and physical_dim_size != logical_dim_size:
|
||||
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.MULTIPLE
|
||||
else:
|
||||
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.PADDDING
|
||||
logical_dim_broadcast_info[logical_dim_idx] = BroadcastType.PADDING
|
||||
|
||||
return logical_dim_broadcast_info
|
||||
|
||||
|
@ -117,7 +117,7 @@ def recover_sharding_spec_for_broadcast_shape(logical_sharding_spec: ShardingSpe
|
|||
for shape_dim, mesh_dim in logical_dim_partition.items():
|
||||
logical_broadcast_type = logical_dim_broadcast_info[shape_dim]
|
||||
|
||||
if logical_broadcast_type == BroadcastType.PADDDING or logical_broadcast_type == BroadcastType.MULTIPLE:
|
||||
if logical_broadcast_type == BroadcastType.PADDING or logical_broadcast_type == BroadcastType.MULTIPLE:
|
||||
removed_dims.extend(mesh_dim)
|
||||
else:
|
||||
# get the corresponding physical dim
|
||||
|
|
Loading…
Reference in New Issue