import pytest
import torch

from colossalai.elixir.tracer.memory_tracer import cuda_memory_profiling
from colossalai.testing import run_on_environment_flag
from tests.test_elixir.utils import TEST_MODELS, to_cuda


def one_step(model, inp):
    loss = model(**inp)
    loss.backward()
    return loss


def try_one_model(model_fn, data_fn):
    model = model_fn().cuda()
    data = to_cuda(data_fn())
    one_step(model, data)    # generate gradients

    pre_cuda_alc = torch.cuda.memory_allocated()
    torch.cuda.reset_peak_memory_stats()
    one_step(model, data)
    aft_cuda_alc = torch.cuda.max_memory_allocated()
    torch_activation_occ = aft_cuda_alc - pre_cuda_alc
    model.zero_grad(set_to_none=True)
    print('normal', torch_activation_occ)

    before = torch.cuda.memory_allocated()
    profiling_dict = cuda_memory_profiling(model, data, one_step)
    after = torch.cuda.memory_allocated()
    print('profiling', profiling_dict)
    assert before == after
    assert torch_activation_occ == profiling_dict['activation_occ']
    print('Check is ok.')


@run_on_environment_flag('ELX')
def test_cuda_profiler():
    model_list = ['resnet', 'gpt2_micro']
    for name in model_list:
        model_fn, data_fn = TEST_MODELS.get(name)
        try_one_model(model_fn, data_fn)


if __name__ == '__main__':
    test_cuda_profiler()