From f36805270c1541a5bb96a23683918980121d7ec5 Mon Sep 17 00:00:00 2001
From: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com>
Date: Mon, 1 Jul 2024 11:53:12 +0800
Subject: [PATCH] Update test_hf_model.py

---
 tests/test_hf_model.py | 61 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/tests/test_hf_model.py b/tests/test_hf_model.py
index dc9ed16..596f1d1 100644
--- a/tests/test_hf_model.py
+++ b/tests/test_hf_model.py
@@ -160,6 +160,67 @@ class TestMath:
         assert_model(response)
         assert '2' in response
 
+class TestReward:
+    """Test cases for base model."""
+
+    @pytest.mark.parametrize(
+        'model_name',
+        [
+            'internlm/internlm-reward-1_8b', 'internlm/internlm-reward-7b',
+            'internlm/internlm-reward-20b'
+        ],
+    )
+    @pytest.mark.parametrize(
+        'usefast',
+        [
+            True,
+            False,
+        ],
+    )
+    def test_demo_default(self, model_name, usefast):
+        tokenizer = AutoTokenizer.from_pretrained(model_name,
+                                                  trust_remote_code=True,
+                                                  use_fast=usefast)
+        model = AutoModel.from_pretrained(model_name, device_map="cuda", 
+                                          torch_dtype=torch.float16, 
+                                          trust_remote_code=True,)
+        tokenizer = AutoTokenizer.from_pretrained(model_name, 
+                                                  trust_remote_code=True)
+
+        chat_1 = [
+            {"role": "user", "content": "Hello! What's your name?"},
+            {"role": "assistant", "content": "My name is InternLM2! A helpful AI assistant. What can I do for you?"}
+        ]
+        chat_2 = [
+            {"role": "user", "content": "Hello! What's your name?"}, 
+            {"role": "assistant", "content": "I have no idea."}
+        ]
+
+        # get reward score for a single chat
+        score1 = model.get_score(tokenizer, chat_1)
+        score2 = model.get_score(tokenizer, chat_2)
+        print("score1: ", score1)
+        print("score2: ", score2)
+        assert score1 > 0.5 && score1 < 1 && score2 < 0
+        
+        # batch inference, get multiple scores at once
+        scores = model.get_scores(tokenizer, [chat_1, chat_2])
+        print("scores: ", scores)
+        assert scores[0] > 0.5 && scores[0] < 1 && scores[1] < 0        
+        
+        # compare whether chat_1 is better than chat_2
+        compare_res = model.compare(tokenizer, chat_1, chat_2)
+        print("compare_res: ", compare_res)
+        assert compare_res
+        # >>> compare_res:  True
+        
+        # rank multiple chats, it will return the ranking index of each chat
+        # the chat with the highest score will have ranking index as 0 
+        rank_res = model.rank(tokenizer, [chat_1, chat_2])
+        print("rank_res: ", rank_res)  # lower index means higher score
+        # >>> rank_res:  [0, 1]  
+        assert rank_res[0] == 0 && rank_res[1] == 1
+)
 
 class TestMMModel:
     """Test cases for base model."""