update

2024-07-01 12:04:03 +08:00 · 2024-07-01 12:04:03 +08:00 · e8a22a7b0d
parent a99d681d63
commit e8a22a7b0d
1 changed files with 81 additions and 0 deletions
--- a/tests/test_hf_model.py
+++ b/tests/test_hf_model.py
@ -233,6 +233,87 @@ class TestReward:
        # >>> rank_res:  [0, 1]
        assert rank_res[0] == 0 & rank_res[1] == 1

+    @pytest.mark.parametrize(
+        'model_name',
+        [
+            'internlm/internlm-reward-1_8b', 'internlm/internlm-reward-7b',
+            'internlm/internlm-reward-20b'
+        ],
+    )
+    @pytest.mark.parametrize(
+        'usefast',
+        [
+            True,
+            False,
+        ],
+    )
+    def test_demo_topn(self, model_name, usefast):
+        # prepare the llm model and tokenizer
+        llm = AutoModel.from_pretrained(
+            'internlm/internlm2-chat-7b',
+            device_map='cuda',
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+        )
+        llm_tokenizer = AutoTokenizer.from_pretrained(
+            'internlm/internlm2-chat-7b', trust_remote_code=True)
+
+        # prepare the reward model and tokenizer
+        reward = AutoModel.from_pretrained(
+            model_name,
+            device_map='cuda',
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+        )
+        reward_tokenizer = AutoTokenizer.from_pretrained(
+            model_name, trust_remote_code=True)
+
+        # prepare the chat prompt
+        prompt = 'Write an article about artificial intelligence revolution.'
+        messages = [{
+            'role': 'system',
+            'content': 'You are a helpful assistant.'
+        }, {
+            'role': 'user',
+            'content': prompt
+        }]
+        text = llm_tokenizer.apply_chat_template(messages,
+                                                 tokenize=False,
+                                                 add_generation_prompt=True)
+        model_inputs = llm_tokenizer([text], return_tensors='pt').to('cuda')
+
+        # generate best of N candidates
+        num_candidates = 3  # N=3
+        candidates = []
+
+        outputs = llm.generate(
+            **model_inputs,
+            max_new_tokens=512,
+            num_return_sequences=num_candidates,
+            pad_token_id=llm_tokenizer.eos_token_id,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95,
+            temperature=0.8,
+        )
+        outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
+        for i in range(num_candidates):
+            candidate = llm_tokenizer.decode(outputs[i],
+                                             skip_special_tokens=True)
+            candidates.append(messages + [{
+                'role': 'assistant',
+                'content': candidate
+            }])
+
+        rank_indices = reward.rank(reward_tokenizer, candidates)
+        sorted_candidates = sorted(zip(rank_indices, candidates),
+                                   key=lambda x: x[0])
+
+        # print the best response
+        best_response = sorted_candidates[0][1][-1]['content']
+        print(best_response)
+        assert len(sorted_candidates) == 3
+

 class TestMMModel:
    """Test cases for base model."""