InternLM/tests/test_hf_model.py

638 lines
24 KiB
Python
Raw Normal View History

import pytest
import torch
from auto_gptq.modeling import BaseGPTQForCausalLM
from bs4 import BeautifulSoup
from lmdeploy import TurbomindEngineConfig, pipeline
from PIL import Image
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
prompts = ['你好', "what's your name"]
def assert_model(response):
assert len(response) != 0
assert 'UNUSED_TOKEN' not in response
assert 'Mynameis' not in response
assert 'Iama' not in response
class TestChat:
"""Test cases for chat model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm2_5-7b-chat', 'internlm/internlm2_5-7b-chat-1m',
'internlm/internlm2_5-20b-chat', 'internlm/internlm2_5-1_8b-chat',
'internlm/internlm2-chat-7b', 'internlm/internlm2-chat-7b-sft',
'internlm/internlm2-chat-20b', 'internlm/internlm2-chat-20b-sft',
'internlm/internlm2-chat-1_8b', 'internlm/internlm2-chat-1_8b-sft'
],
)
@pytest.mark.parametrize(
'usefast',
[
True,
False,
],
)
def test_demo_default(self, model_name, usefast):
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True,
use_fast=usefast)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise
# it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float16,
trust_remote_code=True).cuda()
model = model.eval()
history = []
for prompt in prompts:
response, history = model.chat(tokenizer, prompt, history=history)
print(response)
assert_model(response)
history = []
for prompt in prompts:
length = 0
for response, history in model.stream_chat(tokenizer,
prompt,
history=[]):
print(response[length:], flush=True, end='')
length = len(response)
assert_model(response)
class TestBase:
"""Test cases for base model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm2_5-7b', 'internlm/internlm2-7b',
'internlm/internlm2-base-7b', 'internlm/internlm2-20b',
'internlm/internlm2-base-20b', 'internlm/internlm2-1_8b',
'internlm/internlm2_5-20b',
],
)
@pytest.mark.parametrize(
'usefast',
[
True,
False,
],
)
def test_demo_default(self, model_name, usefast):
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True,
use_fast=usefast)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise
# it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float16,
trust_remote_code=True).cuda()
for prompt in prompts:
inputs = tokenizer(prompt, return_tensors='pt')
for k, v in inputs.items():
inputs[k] = v.cuda()
gen_kwargs = {
'max_length': 128,
'top_p': 10,
'temperature': 1.0,
'do_sample': True,
'repetition_penalty': 1.0,
}
output = model.generate(**inputs, **gen_kwargs)
output = tokenizer.decode(output[0].tolist(),
skip_special_tokens=True)
print(output)
assert_model(output)
class TestMath:
"""Test cases for base model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm2-math-7b', 'internlm/internlm2-math-base-7b',
'internlm/internlm2-math-plus-1_8b',
'internlm/internlm2-math-plus-7b'
],
)
@pytest.mark.parametrize(
'usefast',
[
True,
False,
],
)
def test_demo_default(self, model_name, usefast):
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True,
use_fast=usefast)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise
# it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained(
model_name, trust_remote_code=True,
torch_dtype=torch.float16).cuda()
model = model.eval()
model = model.eval()
response, history = model.chat(tokenizer,
'1+1=',
history=[],
meta_instruction='')
print(response)
assert_model(response)
assert '2' in response
class TestMMModel:
"""Test cases for base model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2-7b',
'internlm/internlm-xcomposer2-7b-4bit'
],
)
def test_demo_default(self, model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise
# it will be loaded as float32 and might cause OOM Error.
if '4bit' in model_name:
model = InternLMXComposer2QForCausalLM.from_quantized(
model_name, trust_remote_code=True, device='cuda:0').eval()
else:
model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float32,
trust_remote_code=True).cuda()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model = model.eval()
img_path_list = [
'tests/panda.jpg',
'tests/bamboo.jpeg',
]
images = []
for img_path in img_path_list:
image = Image.open(img_path).convert('RGB')
image = model.vis_processor(image)
images.append(image)
image = torch.stack(images)
query = '<ImageHere> <ImageHere>please write an article ' \
+ 'based on the images. Title: my favorite animal.'
with torch.cuda.amp.autocast():
response, history = model.chat(tokenizer,
query=query,
image=image,
history=[],
do_sample=False)
print(response)
assert len(response) != 0
assert ' panda' in response
query = '<ImageHere> <ImageHere>请根据图片写一篇作文:我最喜欢的小动物。' \
+ '要求:选准角度,确定立意,明确文体,自拟标题。'
with torch.cuda.amp.autocast():
response, history = model.chat(tokenizer,
query=query,
image=image,
history=[],
do_sample=False)
print(response)
assert len(response) != 0
assert '熊猫' in response
class TestMMVlModel:
"""Test cases for base model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2-vl-7b',
'internlm/internlm-xcomposer2-vl-7b-4bit'
],
)
def test_demo_default(self, model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
torch.set_grad_enabled(False)
# init model and tokenizer
if '4bit' in model_name:
model = InternLMXComposer2QForCausalLM.from_quantized(
model_name, trust_remote_code=True, device='cuda:0').eval()
else:
model = AutoModel.from_pretrained(
model_name, trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
query = '<ImageHere>Please describe this image in detail.'
image = 'tests/image.webp'
with torch.cuda.amp.autocast():
response, _ = model.chat(tokenizer,
query=query,
image=image,
history=[],
do_sample=False)
print(response)
assert len(response) != 0
assert 'Oscar Wilde' in response
assert 'Live life with no excuses, travel with no regret' in response
class InternLMXComposer2QForCausalLM(BaseGPTQForCausalLM):
layers_block_name = 'model.layers'
outside_layer_modules = [
'vit',
'vision_proj',
'model.tok_embeddings',
'model.norm',
'output',
]
inside_layer_modules = [
['attention.wqkv.linear'],
['attention.wo.linear'],
['feed_forward.w1.linear', 'feed_forward.w3.linear'],
['feed_forward.w2.linear'],
]
class TestReward:
"""Test cases for base model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm2-1_8b-reward', 'internlm/internlm2-7b-reward',
'internlm/internlm2-20b-reward'
],
)
@pytest.mark.parametrize(
'usefast',
[
True,
False,
],
)
def test_demo_default(self, model_name, usefast):
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True,
use_fast=usefast)
model = AutoModel.from_pretrained(
model_name,
device_map='cuda',
torch_dtype=torch.float16,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
chat_1 = [{
'role': 'user',
'content': "Hello! What's your name?"
}, {
'role':
'assistant',
'content':
'I am InternLM2! A helpful AI assistant. What can I do for you?'
}]
chat_2 = [{
'role': 'user',
'content': "Hello! What's your name?"
}, {
'role': 'assistant',
'content': 'I have no idea.'
}]
# get reward score for a single chat
score1 = model.get_score(tokenizer, chat_1)
score2 = model.get_score(tokenizer, chat_2)
print('score1: ', score1)
print('score2: ', score2)
assert score1 > 0
assert score2 < 0
# batch inference, get multiple scores at once
scores = model.get_scores(tokenizer, [chat_1, chat_2])
print('scores: ', scores)
assert scores[0] > 0
assert scores[1] < 0
# compare whether chat_1 is better than chat_2
compare_res = model.compare(tokenizer, chat_1, chat_2)
print('compare_res: ', compare_res)
assert compare_res
# >>> compare_res: True
# rank multiple chats, it will return the ranking index of each chat
# the chat with the highest score will have ranking index as 0
rank_res = model.rank(tokenizer, [chat_1, chat_2])
print('rank_res: ', rank_res) # lower index means higher score
# >>> rank_res: [0, 1]
assert rank_res[0] == 0
assert rank_res[1] == 1
class TestXcomposer2d5Model:
"""Test cases for base model."""
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_video_understanding(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
model_name, torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval().half()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model.tokenizer = tokenizer
query = 'Here are some frames of a video. Describe this video in detail' # noqa: F401, E501
image = [
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/liuxiang.mp4',
]
with torch.autocast(device_type='cuda', dtype=torch.float16):
response, his = model.chat(tokenizer,
query,
image,
do_sample=False,
num_beams=3,
use_meta=True)
print(response)
assert len(response) > 100
assert 'athlete' in response.lower()
query = 'tell me the athlete code of Liu Xiang'
image = [
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/liuxiang.mp4',
]
with torch.autocast(device_type='cuda', dtype=torch.float16):
response, _ = model.chat(tokenizer,
query,
image,
history=his,
do_sample=False,
num_beams=3,
use_meta=True)
print(response)
assert len(response) > 10
assert '1363' in response.lower()
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_multi_image_understanding(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
model_name, torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval().half()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model.tokenizer = tokenizer
query = 'Image1 <ImageHere>; Image2 <ImageHere>; Image3 <ImageHere>; I want to buy a car from the three given cars, analyze their advantages and weaknesses one by one' # noqa: F401, E501
image = [
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars1.jpg',
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars2.jpg',
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars3.jpg',
]
with torch.autocast(device_type='cuda', dtype=torch.float16):
response, his = model.chat(tokenizer,
query,
image,
do_sample=False,
num_beams=3,
use_meta=True)
print(response)
assert len(response) > 100
assert 'car' in response.lower()
query = 'Image4 <ImageHere>; How about the car in Image4'
image.append(
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/cars4.jpg')
with torch.autocast(device_type='cuda', dtype=torch.float16):
response, _ = model.chat(tokenizer,
query,
image,
do_sample=False,
num_beams=3,
history=his,
use_meta=True)
print(response)
assert len(response) > 10
assert 'ferrari' in response.lower()
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_high_resolution_default(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
model_name, torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval().half()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model.tokenizer = tokenizer
query = 'Analyze the given image in a detail manner'
image = ['/mnt/petrelfs/qa-caif-cicd/github_runner/examples/dubai.png']
with torch.autocast(device_type='cuda', dtype=torch.float16):
response, _ = model.chat(tokenizer,
query,
image,
do_sample=False,
num_beams=3,
use_meta=True)
print(response)
assert len(response) > 100
assert 'dubai' in response.lower()
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_introduce_web_default(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
model_name, torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model.tokenizer = tokenizer
query = '''A website for Research institutions. The name is Shanghai
AI lab. Top Navigation Bar is blue.Below left, an image shows the
logo of the lab. In the right, there is a passage of text below that
describes the mission of the laboratory.There are several images to
show the research projects of Shanghai AI lab.'''
with torch.autocast(device_type='cuda', dtype=torch.float16):
response = model.write_webpage(
query,
seed=202,
task='Instruction-aware Webpage Generation',
repetition_penalty=3.0)
print(response)
assert len(response) > 100
assert is_html_code(response)
assert 'href' in response.lower()
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_resume_to_webset_default(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
model_name, torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model.tokenizer = tokenizer
# the input should be a resume in markdown format
query = '/mnt/petrelfs/qa-caif-cicd/github_runner/examples/resume.md'
with torch.autocast(device_type='cuda', dtype=torch.float16):
response = model.resume_2_webpage(query,
seed=202,
repetition_penalty=3.0)
print(response)
assert len(response) > 100
assert is_html_code(response)
assert 'href' in response.lower()
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_screen_to_webset_default(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
model_name, torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
model.tokenizer = tokenizer
query = 'Generate the HTML code of this web image with Tailwind CSS.'
image = [
'/mnt/petrelfs/qa-caif-cicd/github_runner/examples/screenshot.jpg'
]
with torch.autocast(device_type='cuda', dtype=torch.float16):
response = model.screen_2_webpage(query,
image,
seed=202,
repetition_penalty=3.0)
print(response)
assert len(response) > 100
assert is_html_code(response)
assert 'href' in response.lower()
@pytest.mark.parametrize(
'model_name',
[
'internlm/internlm-xcomposer2d5-7b',
],
)
def test_write_artical_default(self, model_name):
torch.set_grad_enabled(False)
# init model and tokenizer
model = AutoModel.from_pretrained(
'internlm/internlm-xcomposer2d5-7b',
torch_dtype=torch.bfloat16,
trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(
'internlm/internlm-xcomposer2d5-7b', trust_remote_code=True)
model.tokenizer = tokenizer
query = '''阅读下面的材料,根据要求写作。 电影《长安三万里》的出现让人感慨,影片并未将重点全落在大唐风华上,
也展现了恢弘气象的阴暗面即旧门阀的资源垄断朝政的日益衰败与青年才俊的壮志难酬高适仕进无门只能回乡>沉潜修行
李白虽得玉真公主举荐擢入翰林但他只是成为唐玄宗的御用文人不能真正实现有益于朝政的志意然而片中高潮部分将进酒一节
人至中年挂着肚腩的李白引众人乘仙鹤上天一路从水面瀑布飞升至银河进入仙>李白狂奔着与仙人们碰杯最后大家纵身飞向漩涡般的九重天
肉身的微贱世路的天生我材必有用坎坷拘不住精神的高蹈天生我材必有用千金散尽还复来 古往今来身处闲顿遭受挫折被病痛折磨
很多人都曾经历>了人生的失意却反而成就了他们诗意的人生对正在追求人生价值的当代青年来说如何对待人生中的缺憾和困顿?诗意人生中又
有怎样的自我坚守和自我认同?请结合失意诗意这两个关键词写一篇文章 要求:选准角度确定>立意明确文体自拟标题;不要套作不得抄
;不得泄露个人信息;不少于 800 '''
with torch.autocast(device_type='cuda', dtype=torch.float16):
response = model.write_artical(query, seed=8192)
print(response)
assert len(response) > 100
assert '' in response and '' in response
query = '''Please write a blog based on the title: French Pastries:
A Sweet Indulgence'''
with torch.autocast(device_type='cuda', dtype=torch.float16):
response = model.write_artical(query, seed=8192)
print(response)
assert len(response) > 100
assert ' ' in response and 'a' in response
def is_html_code(html_code):
try:
soup = BeautifulSoup(html_code, 'lxml')
if soup.find('html'):
print('HTML appears to be well-formed.')
return True
else:
print('There was an issue with the HTML structure.')
return False
except Exception as e:
print('Error parsing HTML:', str(e))
return False
class TestChatAwq:
"""Test cases for chat model."""
@pytest.mark.parametrize(
'model_name',
['internlm/internlm2-chat-20b-4bits'],
)
def test_demo_default(self, model_name):
engine_config = TurbomindEngineConfig(model_format='awq')
pipe = pipeline('internlm/internlm2-chat-20b-4bits',
backend_config=engine_config)
responses = pipe(['Hi, pls intro yourself', 'Shanghai is'])
print(responses)
for response in responses:
assert_model(response.text)