diff --git a/.github/workflows/daily_tests.yaml b/.github/workflows/daily_tests.yaml index 0c7062e..ff3570a 100644 --- a/.github/workflows/daily_tests.yaml +++ b/.github/workflows/daily_tests.yaml @@ -36,12 +36,20 @@ jobs: conda create -n internlm-model-latest --clone ${CONDA_BASE_ENV} source activate internlm-model-latest pip install transformers + - name: install torch + run: | + source activate internlm-model-latest + pip install /mnt/petrelfs/qa-caif-cicd/resource/flash_attn-2.5.8+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl + pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 + pip install sentencepiece auto-gptq==0.6.0 beautifulsoup4 decord lxml + export LMDEPLOY_VERSION=0.5.0 + export PYTHON_VERSION=310 + pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118 + python3 -m pip list + conda deactivate - name: run_test run: | source activate internlm-model-latest - pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118 - pip install /mnt/petrelfs/qa-caif-cicd/resource/flash_attn-2.5.8+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl - pip install sentencepiece auto-gptq==0.6.0 lmdeploy[all] srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 pytest -s -v --color=yes ./tests/test_hf_model.py conda deactivate - name: remove_env diff --git a/tests/test_hf_model.py b/tests/test_hf_model.py index dc9ed16..126111f 100644 --- a/tests/test_hf_model.py +++ b/tests/test_hf_model.py @@ -1,6 +1,7 @@ import pytest import torch from auto_gptq.modeling import BaseGPTQForCausalLM +from bs4 import BeautifulSoup from lmdeploy import TurbomindEngineConfig, pipeline from PIL import Image from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer @@ -22,6 +23,7 @@ class TestChat: 'model_name', [ 'internlm/internlm2_5-7b-chat', 'internlm/internlm2_5-7b-chat-1m', + 'internlm/internlm2_5-20b-chat', 'internlm/internlm2_5-1_8b-chat', 'internlm/internlm2-chat-7b', 'internlm/internlm2-chat-7b-sft', 'internlm/internlm2-chat-20b', 'internlm/internlm2-chat-20b-sft', 'internlm/internlm2-chat-1_8b', 'internlm/internlm2-chat-1_8b-sft' @@ -61,23 +63,6 @@ class TestChat: assert_model(response) -class TestChatAwq: - """Test cases for chat model.""" - - @pytest.mark.parametrize( - 'model_name', - ['internlm/internlm2-chat-20b-4bits'], - ) - def test_demo_default(self, model_name): - engine_config = TurbomindEngineConfig(model_format='awq') - pipe = pipeline('internlm/internlm2-chat-20b-4bits', - backend_config=engine_config) - responses = pipe(['Hi, pls intro yourself', 'Shanghai is']) - print(responses) - for response in responses: - assert_model(response.text) - - class TestBase: """Test cases for base model.""" @@ -86,7 +71,8 @@ class TestBase: [ 'internlm/internlm2_5-7b', 'internlm/internlm2-7b', 'internlm/internlm2-base-7b', 'internlm/internlm2-20b', - 'internlm/internlm2-base-20b', 'internlm/internlm2-1_8b' + 'internlm/internlm2-base-20b', 'internlm/internlm2-1_8b', + 'internlm/internlm2_5-20b', ], ) @pytest.mark.parametrize( @@ -279,3 +265,373 @@ class InternLMXComposer2QForCausalLM(BaseGPTQForCausalLM): ['feed_forward.w1.linear', 'feed_forward.w3.linear'], ['feed_forward.w2.linear'], ] + + +class TestReward: + """Test cases for base model.""" + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm2-1_8b-reward', 'internlm/internlm2-7b-reward', + 'internlm/internlm2-20b-reward' + ], + ) + @pytest.mark.parametrize( + 'usefast', + [ + True, + False, + ], + ) + def test_demo_default(self, model_name, usefast): + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True, + use_fast=usefast) + model = AutoModel.from_pretrained( + model_name, + device_map='cuda', + torch_dtype=torch.float16, + trust_remote_code=True, + ) + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + + chat_1 = [{ + 'role': 'user', + 'content': "Hello! What's your name?" + }, { + 'role': + 'assistant', + 'content': + 'I am InternLM2! A helpful AI assistant. What can I do for you?' + }] + chat_2 = [{ + 'role': 'user', + 'content': "Hello! What's your name?" + }, { + 'role': 'assistant', + 'content': 'I have no idea.' + }] + + # get reward score for a single chat + score1 = model.get_score(tokenizer, chat_1) + score2 = model.get_score(tokenizer, chat_2) + print('score1: ', score1) + print('score2: ', score2) + assert score1 > 0 + assert score2 < 0 + + # batch inference, get multiple scores at once + scores = model.get_scores(tokenizer, [chat_1, chat_2]) + print('scores: ', scores) + assert scores[0] > 0 + assert scores[1] < 0 + + # compare whether chat_1 is better than chat_2 + compare_res = model.compare(tokenizer, chat_1, chat_2) + print('compare_res: ', compare_res) + assert compare_res + # >>> compare_res: True + + # rank multiple chats, it will return the ranking index of each chat + # the chat with the highest score will have ranking index as 0 + rank_res = model.rank(tokenizer, [chat_1, chat_2]) + print('rank_res: ', rank_res) # lower index means higher score + # >>> rank_res: [0, 1] + assert rank_res[0] == 0 + assert rank_res[1] == 1 + + +class TestXcomposer2d5Model: + """Test cases for base model.""" + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_video_understanding(self, model_name): + torch.set_grad_enabled(False) + + # init model and tokenizer + model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval().half() + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + model.tokenizer = tokenizer + + query = 'Here are some frames of a video. Describe this video in detail' # noqa: F401, E501 + image = [ + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/liuxiang.mp4', + ] + + with torch.autocast(device_type='cuda', dtype=torch.float16): + response, his = model.chat(tokenizer, + query, + image, + do_sample=False, + num_beams=3, + use_meta=True) + print(response) + assert len(response) > 100 + assert 'athlete' in response.lower() + + query = 'tell me the athlete code of Liu Xiang' + image = [ + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/liuxiang.mp4', + ] + with torch.autocast(device_type='cuda', dtype=torch.float16): + response, _ = model.chat(tokenizer, + query, + image, + history=his, + do_sample=False, + num_beams=3, + use_meta=True) + print(response) + assert len(response) > 10 + assert '1363' in response.lower() + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_multi_image_understanding(self, model_name): + torch.set_grad_enabled(False) + + # init model and tokenizer + model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval().half() + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + model.tokenizer = tokenizer + + query = 'Image1 ; Image2 ; Image3 ; I want to buy a car from the three given cars, analyze their advantages and weaknesses one by one' # noqa: F401, E501 + image = [ + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars1.jpg', + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars2.jpg', + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars3.jpg', + ] + with torch.autocast(device_type='cuda', dtype=torch.float16): + response, his = model.chat(tokenizer, + query, + image, + do_sample=False, + num_beams=3, + use_meta=True) + print(response) + assert len(response) > 100 + assert 'car' in response.lower() + + query = 'Image4 ; How about the car in Image4' + image.append( + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars4.jpg') + with torch.autocast(device_type='cuda', dtype=torch.float16): + response, _ = model.chat(tokenizer, + query, + image, + do_sample=False, + num_beams=3, + history=his, + use_meta=True) + print(response) + assert len(response) > 10 + assert 'ferrari' in response.lower() + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_high_resolution_default(self, model_name): + torch.set_grad_enabled(False) + + # init model and tokenizer + model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval().half() + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + model.tokenizer = tokenizer + + query = 'Analyze the given image in a detail manner' + image = ['/mnt/petrelfs/qa-caif-cicd/github_runner/examples/dubai.png'] + with torch.autocast(device_type='cuda', dtype=torch.float16): + response, _ = model.chat(tokenizer, + query, + image, + do_sample=False, + num_beams=3, + use_meta=True) + print(response) + assert len(response) > 100 + assert 'dubai' in response.lower() + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_introduce_web_default(self, model_name): + torch.set_grad_enabled(False) + # init model and tokenizer + model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval() + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + model.tokenizer = tokenizer + + query = '''A website for Research institutions. The name is Shanghai + AI lab. Top Navigation Bar is blue.Below left, an image shows the + logo of the lab. In the right, there is a passage of text below that + describes the mission of the laboratory.There are several images to + show the research projects of Shanghai AI lab.''' + with torch.autocast(device_type='cuda', dtype=torch.float16): + response = model.write_webpage( + query, + seed=202, + task='Instruction-aware Webpage Generation', + repetition_penalty=3.0) + print(response) + assert len(response) > 100 + assert is_html_code(response) + assert 'href' in response.lower() + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_resume_to_webset_default(self, model_name): + torch.set_grad_enabled(False) + + # init model and tokenizer + model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval() + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + model.tokenizer = tokenizer + + # the input should be a resume in markdown format + query = '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/resume.md' + with torch.autocast(device_type='cuda', dtype=torch.float16): + response = model.resume_2_webpage(query, + seed=202, + repetition_penalty=3.0) + print(response) + assert len(response) > 100 + assert is_html_code(response) + assert 'href' in response.lower() + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_screen_to_webset_default(self, model_name): + torch.set_grad_enabled(False) + + # init model and tokenizer + model = AutoModel.from_pretrained( + model_name, torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval() + tokenizer = AutoTokenizer.from_pretrained(model_name, + trust_remote_code=True) + model.tokenizer = tokenizer + + query = 'Generate the HTML code of this web image with Tailwind CSS.' + image = [ + '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/screenshot.jpg' + ] + with torch.autocast(device_type='cuda', dtype=torch.float16): + response = model.screen_2_webpage(query, + image, + seed=202, + repetition_penalty=3.0) + print(response) + assert len(response) > 100 + assert is_html_code(response) + assert 'href' in response.lower() + + @pytest.mark.parametrize( + 'model_name', + [ + 'internlm/internlm-xcomposer2d5-7b', + ], + ) + def test_write_artical_default(self, model_name): + torch.set_grad_enabled(False) + + # init model and tokenizer + model = AutoModel.from_pretrained( + 'internlm/internlm-xcomposer2d5-7b', + torch_dtype=torch.bfloat16, + trust_remote_code=True).cuda().eval() + tokenizer = AutoTokenizer.from_pretrained( + 'internlm/internlm-xcomposer2d5-7b', trust_remote_code=True) + model.tokenizer = tokenizer + + query = '''阅读下面的材料,根据要求写作。 电影《长安三万里》的出现让人感慨,影片并未将重点全落在大唐风华上, + 也展现了恢弘气象的阴暗面,即旧门阀的资源垄断、朝政的日益衰败与青年才俊的壮志难酬。高适仕进无门,只能回乡>沉潜修行。 + 李白虽得玉真公主举荐,擢入翰林,但他只是成为唐玄宗的御用文人,不能真正实现有益于朝政的志意。然而,片中高潮部分《将进酒》一节, + 人至中年、挂着肚腩的李白引众人乘仙鹤上天,一路从水面、瀑布飞升至银河进入仙>宫,李白狂奔着与仙人们碰杯,最后大家纵身飞向漩涡般的九重天。 + 肉身的微贱、世路的“天生我材必有用,坎坷,拘不住精神的高蹈。“天生我材必有用,千金散尽还复来。” 古往今来,身处闲顿、遭受挫折、被病痛折磨, + 很多人都曾经历>了人生的“失意”,却反而成就了他们“诗意”的人生。对正在追求人生价值的当代青年来说,如何对待人生中的缺憾和困顿?诗意人生中又 + 有怎样的自我坚守和自我认同?请结合“失意”与“诗意”这两个关键词写一篇文章。 要求:选准角度,确定>立意,明确文体,自拟标题;不要套作,不得抄 + 袭;不得泄露个人信息;不少于 800 字。''' + with torch.autocast(device_type='cuda', dtype=torch.float16): + response = model.write_artical(query, seed=8192) + print(response) + assert len(response) > 100 + assert '。' in response and '诗' in response + + query = '''Please write a blog based on the title: French Pastries: + A Sweet Indulgence''' + with torch.autocast(device_type='cuda', dtype=torch.float16): + response = model.write_artical(query, seed=8192) + print(response) + assert len(response) > 100 + assert ' ' in response and 'a' in response + + +def is_html_code(html_code): + try: + soup = BeautifulSoup(html_code, 'lxml') + if soup.find('html'): + print('HTML appears to be well-formed.') + return True + else: + print('There was an issue with the HTML structure.') + return False + except Exception as e: + print('Error parsing HTML:', str(e)) + return False + + +class TestChatAwq: + """Test cases for chat model.""" + + @pytest.mark.parametrize( + 'model_name', + ['internlm/internlm2-chat-20b-4bits'], + ) + def test_demo_default(self, model_name): + engine_config = TurbomindEngineConfig(model_format='awq') + pipe = pipeline('internlm/internlm2-chat-20b-4bits', + backend_config=engine_config) + responses = pipe(['Hi, pls intro yourself', 'Shanghai is']) + print(responses) + for response in responses: + assert_model(response.text)