import pytest import torch from transformers import AutoModelForCausalLM, AutoTokenizer prompts = ["你好", "what's your name"] def assert_model(response): assert len(response) != 0 assert "UNUSED_TOKEN" not in response class TestChat: """ Test cases for chat model. """ @pytest.mark.parametrize( "model_name", [ "internlm/internlm2-chat-7b", "internlm/internlm2-chat-7b-sft", ], ) def test_demo_default(self, model_name): tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Set `torch_dtype=torch.float16` to load model in float16, otherwise # it will be loaded as float32 and might cause OOM Error. model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, trust_remote_code=True ).cuda() model = model.eval() for prompt in prompts: response, history = model.chat(tokenizer, prompt, history=[]) print(response) assert_model(response) for prompt in prompts: length = 0 for response, history in model.stream_chat(tokenizer, prompt, history=[]): print(response[length:], flush=True, end="") length = len(response) assert_model(response) class TestBase: """ Test cases for base model. """ @pytest.mark.parametrize( "model_name", [ "internlm/internlm2-7b", "internlm/internlm2-base-7b", ], ) def test_demo_default(self, model_name): tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Set `torch_dtype=torch.float16` to load model in float16, otherwise # it will be loaded as float32 and might cause OOM Error. model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, trust_remote_code=True ).cuda() for prompt in prompts: inputs = tokenizer(prompt, return_tensors="pt") for k, v in inputs.items(): inputs[k] = v.cuda() gen_kwargs = { "max_length": 128, "top_p": 10, "temperature": 1.0, "do_sample": True, "repetition_penalty": 1.0, } output = model.generate(**inputs, **gen_kwargs) output = tokenizer.decode(output[0].tolist(), skip_special_tokens=True) print(output) assert_model(output)