ColossalAI/tests/test_shardformer/test_model/test_shard_blip2.py

import pytest
import torch

import colossalai
from colossalai.logging import disable_existing_loggers
from colossalai.testing import (
    assert_hf_output_close,
    clear_cache_before_run,
    parameterize,
    rerun_if_address_is_in_use,
    spawn,
)
from tests.kit.model_zoo import model_zoo
from tests.test_shardformer.test_model._utils import build_model, check_grad, run_forward


def check_forward_backward(org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn):
    # check forward
    org_output, org_loss, shard_output, shard_loss = run_forward(
        org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn
    )
    assert_hf_output_close(org_output, shard_output, ignore_keys=["past_key_values"])

    # do backward
    org_loss.backward()
    shard_loss.backward()

    assert torch.allclose(
        org_loss, shard_loss, atol=1e-5
    ), f"shard model loss is not equal to orgin model loss\n{org_loss}\n{shard_loss}"

    # check grad

    blip2 = org_model
    sharded_blip2 = sharded_model

    # check grad
    col_layer_for_check = [
        "vision_model.encoder.layers[0].self_attn.qkv",
        "qformer.encoder.layer[0].attention.attention.query",
        "language_model.model.decoder.layers[0].self_attn.k_proj",
    ]
    row_layer_for_check = [
        "vision_model.encoder.layers[0].self_attn.projection",
        "qformer.encoder.layer[0].attention.output.dense",
        "language_model.model.decoder.layers[0].self_attn.out_proj",
    ]
    check_grad(
        blip2,
        sharded_blip2,
        col_layer_for_check,
        atol=1e-6,
        rtol=1e-5,
        dim=0,
        verbose=False,
    )
    check_grad(
        blip2,
        sharded_blip2,
        row_layer_for_check,
        atol=1e-6,
        rtol=1e-5,
        dim=1,
        verbose=False,
    )


@parameterize("enable_fused_normalization", [True, False])
@parameterize("enable_tensor_parallelism", [True, False])
@parameterize("enable_flash_attention", [True, False])
@parameterize("enable_jit_fused", [True, False])
def run_blip2_test(
    enable_fused_normalization,
    enable_tensor_parallelism,
    enable_flash_attention,
    enable_jit_fused,
):
    sub_model_zoo = model_zoo.get_sub_registry("transformers_blip2")
    for name, (
        model_fn,
        data_gen_fn,
        output_transform_fn,
        loss_fn,
        _,
    ) in sub_model_zoo.items():
        org_model, sharded_model = build_model(
            model_fn,
            enable_fused_normalization,
            enable_tensor_parallelism,
            enable_flash_attention,
            enable_jit_fused,
            dtype=torch.float,
        )
        check_forward_backward(org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn)

    torch.cuda.empty_cache()


def check_blip2(rank, world_size, port):
    disable_existing_loggers()
    colossalai.launch(
        config={},
        rank=rank,
        world_size=world_size,
        host="localhost",
        port=port,
        backend="nccl",
    )
    run_blip2_test()


@pytest.mark.dist
@rerun_if_address_is_in_use()
@clear_cache_before_run()
def test_blip2():
    spawn(check_blip2, 2)


if __name__ == "__main__":
    test_blip2()
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00			`import pytest`
			`import torch`

			`import colossalai`
			`from colossalai.logging import disable_existing_loggers`
			`from colossalai.testing import (`
			`assert_hf_output_close,`
			`clear_cache_before_run,`
			`parameterize,`
			`rerun_if_address_is_in_use,`
			`spawn,`
			`)`
			`from tests.kit.model_zoo import model_zoo`
[test] Hotfix/fix some model test and refactor check util api (#4369) * fix llama test * fix test bug of bert, blip2, bloom, gpt2 * fix llama test * fix opt test * fix sam test * fix sam test * fix t5 test * fix vit test * fix whisper test * fix whisper test * polish code * adjust allclose parameter * Add mistakenly deleted code * addjust allclose * change loss function for some base model 2023-08-03 06:51:36 +00:00			`from tests.test_shardformer.test_model._utils import build_model, check_grad, run_forward`
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00

			`def check_forward_backward(org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn):`
			`# check forward`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`org_output, org_loss, shard_output, shard_loss = run_forward(`
			`org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn`
			`)`
			`assert_hf_output_close(org_output, shard_output, ignore_keys=["past_key_values"])`
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00
			`# do backward`
			`org_loss.backward()`
			`shard_loss.backward()`

[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`assert torch.allclose(`
			`org_loss, shard_loss, atol=1e-5`
			`), f"shard model loss is not equal to orgin model loss\n{org_loss}\n{shard_loss}"`
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00
			`# check grad`

			`blip2 = org_model`
			`sharded_blip2 = sharded_model`

[test] Hotfix/fix some model test and refactor check util api (#4369) * fix llama test * fix test bug of bert, blip2, bloom, gpt2 * fix llama test * fix opt test * fix sam test * fix sam test * fix t5 test * fix vit test * fix whisper test * fix whisper test * polish code * adjust allclose parameter * Add mistakenly deleted code * addjust allclose * change loss function for some base model 2023-08-03 06:51:36 +00:00			`# check grad`
			`col_layer_for_check = [`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`"vision_model.encoder.layers[0].self_attn.qkv",`
			`"qformer.encoder.layer[0].attention.attention.query",`
			`"language_model.model.decoder.layers[0].self_attn.k_proj",`
[test] Hotfix/fix some model test and refactor check util api (#4369) * fix llama test * fix test bug of bert, blip2, bloom, gpt2 * fix llama test * fix opt test * fix sam test * fix sam test * fix t5 test * fix vit test * fix whisper test * fix whisper test * polish code * adjust allclose parameter * Add mistakenly deleted code * addjust allclose * change loss function for some base model 2023-08-03 06:51:36 +00:00			`]`
			`row_layer_for_check = [`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`"vision_model.encoder.layers[0].self_attn.projection",`
			`"qformer.encoder.layer[0].attention.output.dense",`
			`"language_model.model.decoder.layers[0].self_attn.out_proj",`
[test] Hotfix/fix some model test and refactor check util api (#4369) * fix llama test * fix test bug of bert, blip2, bloom, gpt2 * fix llama test * fix opt test * fix sam test * fix sam test * fix t5 test * fix vit test * fix whisper test * fix whisper test * polish code * adjust allclose parameter * Add mistakenly deleted code * addjust allclose * change loss function for some base model 2023-08-03 06:51:36 +00:00			`]`
[shardformer] update colo attention to support custom mask (#5510) * [feature] refactor colo attention (#5462) * [extension] update api * [feature] add colo attention * [feature] update sdpa * [feature] update npu attention * [feature] update flash-attn * [test] add flash attn test * [test] update flash attn test * [shardformer] update modeling to fit colo attention (#5465) * [misc] refactor folder structure * [shardformer] update llama flash-attn * [shardformer] fix llama policy * [devops] update tensornvme install * [test] update llama test * [shardformer] update colo attn kernel dispatch * [shardformer] update blip2 * [shardformer] update chatglm * [shardformer] update gpt2 * [shardformer] update gptj * [shardformer] update opt * [shardformer] update vit * [shardformer] update colo attention mask prep * [shardformer] update whisper * [test] fix shardformer tests (#5514) * [test] fix shardformer tests * [test] fix shardformer tests 2024-03-27 03:19:32 +00:00			`check_grad(`
			`blip2,`
			`sharded_blip2,`
			`col_layer_for_check,`
			`atol=1e-6,`
			`rtol=1e-5,`
			`dim=0,`
			`verbose=False,`
			`)`
			`check_grad(`
			`blip2,`
			`sharded_blip2,`
			`row_layer_for_check,`
			`atol=1e-6,`
			`rtol=1e-5,`
			`dim=1,`
			`verbose=False,`
			`)`
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00

[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`@parameterize("enable_fused_normalization", [True, False])`
			`@parameterize("enable_tensor_parallelism", [True, False])`
			`@parameterize("enable_flash_attention", [True, False])`
			`@parameterize("enable_jit_fused", [True, False])`
[shardformer] update colo attention to support custom mask (#5510) * [feature] refactor colo attention (#5462) * [extension] update api * [feature] add colo attention * [feature] update sdpa * [feature] update npu attention * [feature] update flash-attn * [test] add flash attn test * [test] update flash attn test * [shardformer] update modeling to fit colo attention (#5465) * [misc] refactor folder structure * [shardformer] update llama flash-attn * [shardformer] fix llama policy * [devops] update tensornvme install * [test] update llama test * [shardformer] update colo attn kernel dispatch * [shardformer] update blip2 * [shardformer] update chatglm * [shardformer] update gpt2 * [shardformer] update gptj * [shardformer] update opt * [shardformer] update vit * [shardformer] update colo attention mask prep * [shardformer] update whisper * [test] fix shardformer tests (#5514) * [test] fix shardformer tests * [test] fix shardformer tests 2024-03-27 03:19:32 +00:00			`def run_blip2_test(`
			`enable_fused_normalization,`
			`enable_tensor_parallelism,`
			`enable_flash_attention,`
			`enable_jit_fused,`
			`):`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`sub_model_zoo = model_zoo.get_sub_registry("transformers_blip2")`
[shardformer] update colo attention to support custom mask (#5510) * [feature] refactor colo attention (#5462) * [extension] update api * [feature] add colo attention * [feature] update sdpa * [feature] update npu attention * [feature] update flash-attn * [test] add flash attn test * [test] update flash attn test * [shardformer] update modeling to fit colo attention (#5465) * [misc] refactor folder structure * [shardformer] update llama flash-attn * [shardformer] fix llama policy * [devops] update tensornvme install * [test] update llama test * [shardformer] update colo attn kernel dispatch * [shardformer] update blip2 * [shardformer] update chatglm * [shardformer] update gpt2 * [shardformer] update gptj * [shardformer] update opt * [shardformer] update vit * [shardformer] update colo attention mask prep * [shardformer] update whisper * [test] fix shardformer tests (#5514) * [test] fix shardformer tests * [test] fix shardformer tests 2024-03-27 03:19:32 +00:00			`for name, (`
			`model_fn,`
			`data_gen_fn,`
			`output_transform_fn,`
			`loss_fn,`
			`_,`
			`) in sub_model_zoo.items():`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`org_model, sharded_model = build_model(`
[shardformer] update colo attention to support custom mask (#5510) * [feature] refactor colo attention (#5462) * [extension] update api * [feature] add colo attention * [feature] update sdpa * [feature] update npu attention * [feature] update flash-attn * [test] add flash attn test * [test] update flash attn test * [shardformer] update modeling to fit colo attention (#5465) * [misc] refactor folder structure * [shardformer] update llama flash-attn * [shardformer] fix llama policy * [devops] update tensornvme install * [test] update llama test * [shardformer] update colo attn kernel dispatch * [shardformer] update blip2 * [shardformer] update chatglm * [shardformer] update gpt2 * [shardformer] update gptj * [shardformer] update opt * [shardformer] update vit * [shardformer] update colo attention mask prep * [shardformer] update whisper * [test] fix shardformer tests (#5514) * [test] fix shardformer tests * [test] fix shardformer tests 2024-03-27 03:19:32 +00:00			`model_fn,`
			`enable_fused_normalization,`
			`enable_tensor_parallelism,`
			`enable_flash_attention,`
			`enable_jit_fused,`
			`dtype=torch.float,`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`)`
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00			`check_forward_backward(org_model, sharded_model, data_gen_fn, output_transform_fn, loss_fn)`

			`torch.cuda.empty_cache()`


			`def check_blip2(rank, world_size, port):`
			`disable_existing_loggers()`
[shardformer] update colo attention to support custom mask (#5510) * [feature] refactor colo attention (#5462) * [extension] update api * [feature] add colo attention * [feature] update sdpa * [feature] update npu attention * [feature] update flash-attn * [test] add flash attn test * [test] update flash attn test * [shardformer] update modeling to fit colo attention (#5465) * [misc] refactor folder structure * [shardformer] update llama flash-attn * [shardformer] fix llama policy * [devops] update tensornvme install * [test] update llama test * [shardformer] update colo attn kernel dispatch * [shardformer] update blip2 * [shardformer] update chatglm * [shardformer] update gpt2 * [shardformer] update gptj * [shardformer] update opt * [shardformer] update vit * [shardformer] update colo attention mask prep * [shardformer] update whisper * [test] fix shardformer tests (#5514) * [test] fix shardformer tests * [test] fix shardformer tests 2024-03-27 03:19:32 +00:00			`colossalai.launch(`
			`config={},`
			`rank=rank,`
			`world_size=world_size,`
			`host="localhost",`
			`port=port,`
			`backend="nccl",`
			`)`
[shardformer] support Blip2 (#4243) * support base blip2 * add support for downstream blip2 model * update readme * add forward injection * skip not compatible models test * fix test for gemini and low_level_zero_pugin 2023-07-25 06:29:10 +00:00			`run_blip2_test()`


			`@pytest.mark.dist`
			`@rerun_if_address_is_in_use()`
			`@clear_cache_before_run()`
			`def test_blip2():`
			`spawn(check_blip2, 2)`


			`if __name__ == "__main__":`
			`test_blip2()`