Update test_hf_model.py

2024-07-04 19:24:05 +08:00 · 2024-07-04 19:24:05 +08:00 · ed26d1d76c
parent 091774d928
commit ed26d1d76c
1 changed files with 0 additions and 158 deletions
--- a/tests/test_hf_model.py
+++ b/tests/test_hf_model.py
@ -161,164 +161,6 @@ class TestMath:
        assert '2' in response
 class TestReward:
    """Test cases for base model."""
    @pytest.mark.parametrize(
        'model_name',
        [
            'internlm/internlm-reward-1_8b', 'internlm/internlm-reward-7b',
            'internlm/internlm-reward-20b'
        ],
    )
    @pytest.mark.parametrize(
        'usefast',
        [
            True,
            False,
        ],
    )
    def test_demo_default(self, model_name, usefast):
        tokenizer = AutoTokenizer.from_pretrained(model_name,
                                                  trust_remote_code=True,
                                                  use_fast=usefast)
        model = AutoModel.from_pretrained(
            model_name,
            device_map='cuda',
            torch_dtype=torch.float16,
            trust_remote_code=True,
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name,
                                                  trust_remote_code=True)
        chat_1 = [{
            'role': 'user',
            'content': "Hello! What's your name?"
        }, {
            'role':
            'assistant',
            'content':
            'I am InternLM2! A helpful AI assistant. What can I do for you?'
        }]
        chat_2 = [{
            'role': 'user',
            'content': "Hello! What's your name?"
        }, {
            'role': 'assistant',
            'content': 'I have no idea.'
        }]
        # get reward score for a single chat
        score1 = model.get_score(tokenizer, chat_1)
        score2 = model.get_score(tokenizer, chat_2)
        print('score1: ', score1)
        print('score2: ', score2)
        assert score1 > 0
        assert score2 < 0
        # batch inference, get multiple scores at once
        scores = model.get_scores(tokenizer, [chat_1, chat_2])
        print('scores: ', scores)
        assert scores[0] > 0
        assert scores[1] < 0
        # compare whether chat_1 is better than chat_2
        compare_res = model.compare(tokenizer, chat_1, chat_2)
        print('compare_res: ', compare_res)
        assert compare_res
        # >>> compare_res:  True
        # rank multiple chats, it will return the ranking index of each chat
        # the chat with the highest score will have ranking index as 0
        rank_res = model.rank(tokenizer, [chat_1, chat_2])
        print('rank_res: ', rank_res)  # lower index means higher score
        # >>> rank_res:  [0, 1]
        assert rank_res[0] == 0
        assert rank_res[1] == 1
    @pytest.mark.parametrize(
        'model_name',
        [
            'internlm/internlm-reward-1_8b', 'internlm/internlm-reward-7b',
            'internlm/internlm-reward-20b'
        ],
    )
    @pytest.mark.parametrize(
        'usefast',
        [
            True,
            False,
        ],
    )
    def test_demo_topn(self, model_name, usefast):
        # prepare the llm model and tokenizer
        llm = AutoModel.from_pretrained(
            'internlm/internlm2-chat-7b',
            device_map='cuda',
            torch_dtype=torch.float16,
            trust_remote_code=True,
        )
        llm_tokenizer = AutoTokenizer.from_pretrained(
            'internlm/internlm2-chat-7b', trust_remote_code=True)
        # prepare the reward model and tokenizer
        reward = AutoModel.from_pretrained(
            model_name,
            device_map='cuda',
            torch_dtype=torch.float16,
            trust_remote_code=True,
        )
        reward_tokenizer = AutoTokenizer.from_pretrained(
            model_name, trust_remote_code=True)
        # prepare the chat prompt
        prompt = 'Write an short bedtime story.'
        messages = [{
            'role': 'system',
            'content': 'You are a helpful assistant.'
        }, {
            'role': 'user',
            'content': prompt
        }]
        text = llm_tokenizer.apply_chat_template(messages,
                                                 tokenize=False,
                                                 add_generation_prompt=True)
        model_inputs = llm_tokenizer([text], return_tensors='pt').to('cuda')
        # generate best of N candidates
        num_candidates = 3  # N=3
        candidates = []
        outputs = llm.generate(
            **model_inputs,
            max_new_tokens=512,
            num_return_sequences=num_candidates,
            pad_token_id=llm_tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.8,
        )
        outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
        for i in range(num_candidates):
            candidate = llm_tokenizer.decode(outputs[i],
                                             skip_special_tokens=True)
            candidates.append(messages + [{
                'role': 'assistant',
                'content': candidate
            }])
        rank_indices = reward.rank(reward_tokenizer, candidates)
        sorted_candidates = sorted(zip(rank_indices, candidates),
                                   key=lambda x: x[0])
        # print the best response
        best_response = sorted_candidates[0][1][-1]['content']
        print(sorted_candidates)
        print(best_response)
        assert len(sorted_candidates) == 3
 class TestMMModel:
    """Test cases for base model."""