fix typo colossalai/auto_parallel autochunk fx/passes etc. (#3808)

pull/3825/head
digger yu 2023-05-24 09:01:50 +08:00 committed by GitHub
parent 725365f297
commit 7f8203af69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 31 additions and 31 deletions

View File

@ -14,7 +14,7 @@
- [Compatibility Test on Dispatch](#compatibility-test-on-dispatch)
- [Release](#release)
- [User Friendliness](#user-friendliness)
- [Commmunity](#commmunity)
- [Community](#community)
- [Configuration](#configuration)
- [Progress Log](#progress-log)
@ -97,7 +97,7 @@ This workflow is triggered by manually dispatching the workflow. It has the foll
| `Synchronize submodule` | `submodule.yml` | This workflow will check if any git submodule is updated. If so, it will create a PR to update the submodule pointers. |
| `Close inactive issues` | `close_inactive.yml` | This workflow will close issues which are stale for 14 days. |
### Commmunity
### Community
| Workflow Name | File name | Description |
| -------------------------------------------- | -------------------------------- | -------------------------------------------------------------------------------- |

View File

@ -148,7 +148,7 @@ class MetaInfoProp:
graph_info.fwd_tmp = buffer_tensors
graph_info.fwd_out = output_tensors
# fetch other memory informations
# fetch other memory information
memory_cost = meta_info.memory_cost
graph_info.fwd_mem_tmp = memory_cost.fwd.temp
graph_info.fwd_mem_out = memory_cost.fwd.activation

View File

@ -44,7 +44,7 @@ class BatchNormStrategyGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: a constant coefficient need to be added.
# 1D: (L) * N * Cin

View File

@ -38,9 +38,9 @@ class ConvStrategyGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# 1D: (L) * N * Cout * Cin * kernel
# 2D: (H * W) * N * Cout * Cin * kernel
# 3D: (H * W * D) * N * Cout * Cin * kernel

View File

@ -34,9 +34,9 @@ class LayerNormGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# TODO: a constant coefficient need to be added.
sharded_input_shape = strategy.sharding_specs[self.op_data['input']].get_sharded_shape_per_device()

View File

@ -17,7 +17,7 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
"""
NormalPoolStrategyGenerator is a generic class to generate strategies for pool operation like MaxPoolxd.
The reason we call this normal pool is AvgPoolxd and MaxPoolxd are taking the kernel size element from image,
and reduce them depening on the operation type.
and reduce them depending on the operation type.
"""
def validate(self) -> bool:
@ -35,9 +35,9 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# 1D: (Lout) * N * C * kernel
# 2D: (H * W) * N * Cout * Cin * kernel
# 3D: (H * W * D) * N * Cout * Cin * kernel

View File

@ -366,8 +366,8 @@ class TraceFlow(object):
# find non chunk inputs
chunk_info = self._get_non_chunk_inputs(chunk_info, start_idx, end_idx)
# reassgin reshape size, some size may have changed due to chunk
chunk_info = self._reassgin_reshape_size(chunk_info)
# reassign reshape size, some size may have changed due to chunk
chunk_info = self._reassign_reshape_size(chunk_info)
return chunk_info
@ -428,10 +428,10 @@ class TraceFlow(object):
chunk_info["outputs_dim"].append(output_dim)
return True
def _reassgin_reshape_size(self, chunk_info):
def _reassign_reshape_size(self, chunk_info):
"""
Some shape args in reshape may have changed due to chunk
reassgin those changed shape
reassign those changed shape
"""
chunk_region = chunk_info["region"]
reshape_size = {}

View File

@ -397,7 +397,7 @@ class TraceIndice(object):
input_node = node.args[0]
assert len(get_node_shape(input_node)) == 4
# assgin index
# assign index
self._assign_indice_as_input(node, node_idx, input_node)
self._del_dim(node_idx, 1)
self._add_dim(node_idx, 1)
@ -415,7 +415,7 @@ class TraceIndice(object):
assert node.kwargs['size'] is None
assert len(get_node_shape(node)) == 4
# assgin index
# assign index
self._assign_indice_as_input(node, node_idx)
self._mark_computation(node, node_idx, [-1, -2])

View File

@ -179,7 +179,7 @@ class GeminiPlugin(DPPluginBase):
Users can provide this argument to speed up searching.
If users do not know this argument before training, it is ok. We will use a default value 1024.
min_chunk_size_mb (float, optional): the minimum chunk size in MegaByte.
If the aggregate size of parameters is still samller than the minimum chunk size,
If the aggregate size of parameters is still smaller than the minimum chunk size,
all parameters will be compacted into one small chunk.
memstats (MemStats, optional) the memory statistics collector by a runtime memory tracer.
gpu_margin_mem_ratio (float, optional): The ratio of GPU remaining memory (after the first forward-backward)

View File

@ -181,7 +181,7 @@ class DistCoordinator(metaclass=SingletonMeta):
"""
is_master = self.is_master(process_group)
# define an inner functiuon
# define an inner function
def decorator(func):
@functools.wraps(func)

View File

@ -381,7 +381,7 @@ class AlphaBetaProfiler:
first_latency, first_bandwidth = _extract_alpha_beta(first_axis, first_axis_process_group)
second_latency, second_bandwidth = _extract_alpha_beta(second_axis, second_axis_process_group)
mesh_alpha = [first_latency, second_latency]
# The beta values have been enlarged by 1e10 times temporarilly because the computation cost
# The beta values have been enlarged by 1e10 times temporarily because the computation cost
# is still estimated in the unit of TFLOPs instead of time. We will remove this factor in future.
mesh_beta = [1e10 / first_bandwidth, 1e10 / second_bandwidth]

View File

@ -152,9 +152,9 @@ class PipelineSchedule(BaseSchedule):
raise TypeError(f"Expected data to be of type torch.Tensor, list, tuple, or dict, but got {type(data)}")
def load_micro_batch(self):
mciro_batch_data = self._get_data_slice(self.batch_data, self.microbatch_offset)
micro_batch_data = self._get_data_slice(self.batch_data, self.microbatch_offset)
self.microbatch_offset += self.microbatch_size
return self._move_to_device(mciro_batch_data)
return self._move_to_device(micro_batch_data)
def pre_processing(self, engine):
from colossalai.zero.legacy import ShardedModelV2

View File

@ -84,7 +84,7 @@ class PipelineScheduleV2(PipelineSchedule):
'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
self.load_batch(data_iter)
# num_warmup_microbatches is the step when not all the processers are working
# num_warmup_microbatches is the step when not all the processes are working
num_warmup_microbatches = \
(gpc.get_world_size(ParallelMode.PIPELINE)
- gpc.get_local_rank(ParallelMode.PIPELINE) - 1)

View File

@ -523,7 +523,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,
# append code text to body
for idx, node in enumerate(node_list):
# if this is the first node of the ckpt region
# append the ckpt function defition
# append the ckpt function definition
if idx in start_idx:
label = start_idx.index(idx)
ckpt_fn_def = _gen_ckpt_fn_def(label, input_vars[label])

View File

@ -206,7 +206,7 @@ def avgcompute_split_pass(gm: torch.fx.GraphModule, pp_size: int):
def avgnode_split_pass(gm: torch.fx.GraphModule, pp_size: int):
"""
In avgnode_split_pass, simpliy split graph by node number.
In avgnode_split_pass, simply split graph by node number.
"""
mod_graph = gm.graph
avg_num_node = len(mod_graph.nodes) // pp_size

View File

@ -16,7 +16,7 @@ def apply(*args, **kwargs):
return shape_consistency_manager.apply(*args, **kwargs)
def solution_annotatation_pass(gm: torch.fx.GraphModule, solution: List[int], device_mesh):
def solution_annotation_pass(gm: torch.fx.GraphModule, solution: List[int], device_mesh):
mod_graph = gm.graph
nodes = tuple(mod_graph.nodes)

View File

@ -31,7 +31,7 @@ class TensorMetadata(NamedTuple):
numel: int
is_tensor: bool
# TODO: we can add a list of sharding spec here, and record the sharding
# behaviour by appending sharding spec into list.
# behavior by appending sharding spec into list.
def _extract_tensor_metadata(result: torch.Tensor) -> TensorMetadata:

View File

@ -230,7 +230,7 @@ def split_module_for_gpt2_test(
use_partition.partitions_dependent_on.setdefault(def_partition_name)
node_process_list = list(m.graph.nodes)
# split nodes into parititons
# split nodes into partitions
while node_process_list:
node = node_process_list.pop(0)
orig_nodes[node.name] = node
@ -277,7 +277,7 @@ def split_module_for_gpt2_test(
if len(sorted_partitions) != len(partitions):
raise RuntimeError("cycle exists between partitions!")
# add placeholders to parititons
# add placeholders to partitions
for partition_name in sorted_partitions:
partition = partitions[partition_name]
for input in partition.inputs:

View File

@ -29,8 +29,8 @@ class Partition:
f" nodes: {self.node_names},\n" \
f" inputs: {self.inputs},\n" \
f" outputs: {self.outputs},\n" \
f" partitions depenent on: {self.partitions_dependent_on},\n" \
f" parition dependents: {self.partition_dependents}"
f" partitions dependent on: {self.partitions_dependent_on},\n" \
f" partition dependents: {self.partition_dependents}"
# Creates subgraphs out of main graph