ColossalAI/colossalai/cli/benchmark/benchmark.py

import colossalai
import click
import torch.multiprocessing as mp

from functools import partial
from typing import List, Dict

from colossalai.context import Config
from colossalai.context.random import reset_seeds
from colossalai.core import global_context as gpc
from colossalai.logging import disable_existing_loggers, get_dist_logger
from colossalai.utils import free_port, MultiTimer
from colossalai.cli.benchmark.utils import find_all_configs, profile_model, get_batch_data
from .models import MLP


def run_benchmark(args: Config) -> None:
    """
    Run benchmarking with torch.multiprocessing.
    """

    # sanity checks
    if args.gpus is None:
        click.echo("Error: --num_gpus is not given")
        exit()
    if args.gpus <= 1:
        click.echo("Warning: tensor parallel will be activated with at least 2 devices.")

    click.echo("=== Benchmarking Parameters ===")
    for k, v in args.items():
        click.echo(f'{k}: {v}')
    click.echo('')

    config_list = find_all_configs(args.gpus)

    avail_ports = [free_port() for _ in range(len(config_list))]
    run_func = partial(run_dist_profiling,
                       world_size=args.gpus,
                       port_list=avail_ports,
                       config_list=config_list,
                       hyperparams=args)
    mp.spawn(run_func, nprocs=args.gpus)


def run_dist_profiling(rank: int, world_size: int, port_list: List[int], config_list: List[Dict],
                       hyperparams: Config) -> None:
    """
    A function executed for profiling, this function should be spawn by torch.multiprocessing.

    Args:
        rank (int): rank of the process
        world_size (int): the number of processes
        port_list (List[int]): a list of free ports for initializing distributed networks
        config_list (List[Dict]): a list of configuration
        hyperparams (Config): the hyperparameters given by the user
    
    """

    # disable logging for clean output
    disable_existing_loggers()
    logger = get_dist_logger()
    logger.set_level('WARNING')

    for config, port in zip(config_list, port_list):
        colossalai.launch(config=config, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
        timer = MultiTimer()

        # 1D parallel should be skipped if in_features or out_features is not able to be divided exactly by 1D parallel size.
        if config.parallel.tensor.mode == '1d' and hyperparams.dimension % config.parallel.tensor.size != 0:
            click.echo(
                "1D parallel will be skipped because in_features or out_features is not able to be divided exactly by 1D parallel size."
            )
            continue

        if hyperparams.model == 'mlp':
            model = MLP(dim=hyperparams.dimension, layers=hyperparams.layers)
        else:
            if gpc.get_global_rank() == 0:
                click.echo("Error: Invalid argument for --model")
                exit()

        data_func = partial(get_batch_data,
                            dim=hyperparams.dimension,
                            batch_size=hyperparams.batch_size,
                            seq_length=hyperparams.seq_len,
                            mode=config.parallel.tensor.mode)

        fwd_time, bwd_time, max_allocated, max_cached = profile_model(model=model,
                                                                      warmup_steps=hyperparams.warmup_steps,
                                                                      profile_steps=hyperparams.profile_steps,
                                                                      data_func=data_func,
                                                                      timer=timer)

        gpc.destroy()
        reset_seeds()

        if gpc.get_global_rank() == 0:
            config_str = ', '.join([f'{k}: {v}' for k, v in config.parallel.tensor.items()])
            click.echo(f"=== {config_str} ===")
            click.echo(f"Average forward time: {fwd_time}")
            click.echo(f"Average backward time: {bwd_time}")
            click.echo(f"Max allocated GPU memory: {max_allocated}")
            click.echo(f"Max cached GPU memory: {max_cached}\n")
[cli] refactored micro-benchmarking cli and added more metrics (#858) 2022-04-25 03:48:07 +00:00			`import colossalai`
			`import click`
			`import torch.multiprocessing as mp`

			`from functools import partial`
			`from typing import List, Dict`

			`from colossalai.context import Config`
			`from colossalai.context.random import reset_seeds`
			`from colossalai.core import global_context as gpc`
			`from colossalai.logging import disable_existing_loggers, get_dist_logger`
			`from colossalai.utils import free_port, MultiTimer`
			`from colossalai.cli.benchmark.utils import find_all_configs, profile_model, get_batch_data`
			`from .models import MLP`


			`def run_benchmark(args: Config) -> None:`
			`"""`
			`Run benchmarking with torch.multiprocessing.`
			`"""`

			`# sanity checks`
			`if args.gpus is None:`
			`click.echo("Error: --num_gpus is not given")`
			`exit()`
[hotfix] fix some bugs caused by size mismatch. (#1011) * [CLI] add CLI launcher * Revert "[CLI] add CLI launcher" This reverts commit df7e6506d4500af6a9220ef7fe4d3c7b1daebd4c. * [hotfix]fix some bugs caused by size mismatch. * add warning logs * polish 2022-05-23 06:02:28 +00:00			`if args.gpus <= 1:`
			`click.echo("Warning: tensor parallel will be activated with at least 2 devices.")`
[cli] refactored micro-benchmarking cli and added more metrics (#858) 2022-04-25 03:48:07 +00:00
			`click.echo("=== Benchmarking Parameters ===")`
			`for k, v in args.items():`
			`click.echo(f'{k}: {v}')`
			`click.echo('')`

			`config_list = find_all_configs(args.gpus)`

			`avail_ports = [free_port() for _ in range(len(config_list))]`
			`run_func = partial(run_dist_profiling,`
			`world_size=args.gpus,`
			`port_list=avail_ports,`
			`config_list=config_list,`
			`hyperparams=args)`
			`mp.spawn(run_func, nprocs=args.gpus)`


			`def run_dist_profiling(rank: int, world_size: int, port_list: List[int], config_list: List[Dict],`
			`hyperparams: Config) -> None:`
			`"""`
			`A function executed for profiling, this function should be spawn by torch.multiprocessing.`

			`Args:`
			`rank (int): rank of the process`
			`world_size (int): the number of processes`
			`port_list (List[int]): a list of free ports for initializing distributed networks`
			`config_list (List[Dict]): a list of configuration`
			`hyperparams (Config): the hyperparameters given by the user`

			`"""`

			`# disable logging for clean output`
			`disable_existing_loggers()`
			`logger = get_dist_logger()`
			`logger.set_level('WARNING')`

			`for config, port in zip(config_list, port_list):`
			`colossalai.launch(config=config, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')`
			`timer = MultiTimer()`

[hotfix] fix some bugs caused by size mismatch. (#1011) * [CLI] add CLI launcher * Revert "[CLI] add CLI launcher" This reverts commit df7e6506d4500af6a9220ef7fe4d3c7b1daebd4c. * [hotfix]fix some bugs caused by size mismatch. * add warning logs * polish 2022-05-23 06:02:28 +00:00			`# 1D parallel should be skipped if in_features or out_features is not able to be divided exactly by 1D parallel size.`
			`if config.parallel.tensor.mode == '1d' and hyperparams.dimension % config.parallel.tensor.size != 0:`
			`click.echo(`
			`"1D parallel will be skipped because in_features or out_features is not able to be divided exactly by 1D parallel size."`
			`)`
			`continue`

[cli] refactored micro-benchmarking cli and added more metrics (#858) 2022-04-25 03:48:07 +00:00			`if hyperparams.model == 'mlp':`
			`model = MLP(dim=hyperparams.dimension, layers=hyperparams.layers)`
			`else:`
			`if gpc.get_global_rank() == 0:`
			`click.echo("Error: Invalid argument for --model")`
			`exit()`

			`data_func = partial(get_batch_data,`
			`dim=hyperparams.dimension,`
			`batch_size=hyperparams.batch_size,`
			`seq_length=hyperparams.seq_len,`
			`mode=config.parallel.tensor.mode)`

			`fwd_time, bwd_time, max_allocated, max_cached = profile_model(model=model,`
			`warmup_steps=hyperparams.warmup_steps,`
			`profile_steps=hyperparams.profile_steps,`
			`data_func=data_func,`
			`timer=timer)`

			`gpc.destroy()`
			`reset_seeds()`

			`if gpc.get_global_rank() == 0:`
			`config_str = ', '.join([f'{k}: {v}' for k, v in config.parallel.tensor.items()])`
			`click.echo(f"=== {config_str} ===")`
			`click.echo(f"Average forward time: {fwd_time}")`
			`click.echo(f"Average backward time: {bwd_time}")`
			`click.echo(f"Max allocated GPU memory: {max_allocated}")`
			`click.echo(f"Max cached GPU memory: {max_cached}\n")`