mirror of https://github.com/hpcaitech/ColossalAI
[hotfix] quick fixes to make legacy tutorials runnable (#5559)
Co-authored-by: Edenzzzz <wtan45@wisc.edu>pull/5578/head
parent
8e412a548e
commit
15055f9a36
|
@ -237,7 +237,7 @@ class ColoTracer(Tracer):
|
|||
# override the tracer to support custom modules and checkpointing
|
||||
if self.trace_act_ckpt:
|
||||
orig_ckpt_func_apply = torch.utils.checkpoint.CheckpointFunction.apply
|
||||
orig_ckpt_func_without_reentrant = torch.utils.checkpoint._checkpoint_without_reentrant
|
||||
orig_ckpt_func_without_reentrant = torch.utils.checkpoint._checkpoint_without_reentrant_generator
|
||||
|
||||
def checkpoint(run_function, preserve_rng_state=False, *args):
|
||||
self.ckpt_regions.append(self.ckpt_idx)
|
||||
|
|
|
@ -8,6 +8,14 @@ import torch.nn as nn
|
|||
|
||||
from colossalai.kernel.kernel_loader import ScaledMaskedSoftmaxLoader, ScaledUpperTriangleMaskedSoftmaxLoader
|
||||
|
||||
# NOTE: These kernels are compiled on specific GPU arch and not widely applicable.
|
||||
# try:
|
||||
# from colossalai._C import scaled_masked_softmax as scaled_masked_softmax, scaled_upper_triangle_masked_softmax_cuda as scaled_upper_triang_masked_softmax
|
||||
# except ImportError:
|
||||
|
||||
scaled_masked_softmax = None
|
||||
scaled_upper_triang_masked_softmax = None
|
||||
|
||||
|
||||
class AttnMaskType(enum.Enum):
|
||||
padding = 1
|
||||
|
|
|
@ -17,7 +17,7 @@ def synthesize_data():
|
|||
|
||||
|
||||
def main():
|
||||
colossalai.launch_from_torch(config="./config.py")
|
||||
colossalai.legacy.launch_from_torch(config="./config.py")
|
||||
|
||||
logger = get_dist_logger()
|
||||
|
||||
|
|
|
@ -41,9 +41,9 @@ class DummyDataloader:
|
|||
|
||||
def main():
|
||||
# launch from torch
|
||||
parser = colossalai.get_default_parser()
|
||||
parser = colossalai.legacy.get_default_parser()
|
||||
args = parser.parse_args()
|
||||
colossalai.launch_from_torch(config=args.config)
|
||||
colossalai.legacy.launch_from_torch(config=args.config)
|
||||
|
||||
# get logger
|
||||
logger = get_dist_logger()
|
||||
|
|
|
@ -37,14 +37,14 @@ class DummyDataloader:
|
|||
|
||||
def main():
|
||||
# initialize distributed setting
|
||||
parser = colossalai.get_default_parser()
|
||||
parser = colossalai.legacy.get_default_parser()
|
||||
parser.add_argument(
|
||||
"--optimizer", choices=["lars", "lamb"], help="Choose your large-batch optimizer", required=True
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# launch from torch
|
||||
colossalai.launch_from_torch(config=args.config)
|
||||
colossalai.legacy.launch_from_torch(config=args.config)
|
||||
|
||||
# get logger
|
||||
logger = get_dist_logger()
|
||||
|
@ -73,7 +73,7 @@ def main():
|
|||
)
|
||||
|
||||
# initialize
|
||||
engine, train_dataloader, test_dataloader, _ = colossalai.initialize(
|
||||
engine, train_dataloader, test_dataloader, _ = colossalai.legacy.initialize(
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
criterion=criterion,
|
||||
|
|
|
@ -72,7 +72,7 @@ def get_time_stamp():
|
|||
|
||||
|
||||
def parse_args():
|
||||
parser = colossalai.get_default_parser()
|
||||
parser = colossalai.legacy.get_default_parser()
|
||||
parser.add_argument("-s", "--synthetic", action="store_true")
|
||||
parser.add_argument(
|
||||
"--dataset_name",
|
||||
|
@ -289,7 +289,7 @@ class DummyDataloader:
|
|||
def main():
|
||||
args = parse_args()
|
||||
disable_existing_loggers()
|
||||
colossalai.launch_from_torch(config=dict())
|
||||
colossalai.legacy.launch_from_torch(config=dict())
|
||||
logger = get_dist_logger()
|
||||
is_main_process = dist.get_rank() == 0
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from colossalai.kernel.cuda_native import LayerNorm
|
||||
from colossalai.kernel.jit import bias_dropout_add_fused_inference, bias_dropout_add_fused_train
|
||||
from colossalai.legacy.nn.layer.parallel_sequence import TransformerSelfAttentionRing
|
||||
from colossalai.nn.layer.layernorm import MixedFusedLayerNorm as LayerNorm
|
||||
|
||||
from .dropout import get_bias_dropout_add
|
||||
from .mlp import TransformerMLP
|
||||
|
|
|
@ -48,7 +48,7 @@ def pipeline_data_process_func(stage_output, micro_batch_data):
|
|||
def main():
|
||||
# initialize
|
||||
parse_args()
|
||||
colossalai.launch_from_torch(config="./config.py", seed=1234, backend="nccl")
|
||||
colossalai.legacy.launch_from_torch(config="./config.py", seed=1234, backend="nccl")
|
||||
|
||||
logger = get_dist_logger()
|
||||
|
||||
|
@ -136,7 +136,7 @@ def main():
|
|||
logger.info(f"LR Scheduler is built with {warmup_steps} warmup steps and {gpc.config.DECAY_ITERS} decay steps")
|
||||
|
||||
# # init
|
||||
engine, *dummy = colossalai.initialize(model, optimizer, criterion, verbose=True)
|
||||
engine, *dummy = colossalai.legacy.initialize(model, optimizer, criterion, verbose=True)
|
||||
|
||||
# build timer
|
||||
timer = MultiTimer()
|
||||
|
|
Loading…
Reference in New Issue