ColossalAI/applications/ChatGPT/chatgpt/models/base/critic.py

51 lines
1.5 KiB
Python
Raw Normal View History

2023-02-14 14:17:25 +00:00
from typing import Optional
import torch
import torch.nn as nn
2023-03-07 08:34:22 +00:00
from ..lora import LoRAModule
from ..utils import masked_mean
2023-02-14 14:17:25 +00:00
class Critic(LoRAModule):
"""
Critic model base class.
Args:
model (nn.Module): Critic model.
value_head (nn.Module): Value head to get value.
lora_rank (int): LoRA rank.
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
model: nn.Module,
value_head: nn.Module,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
self.model = model
self.value_head = value_head
self.convert_to_lora()
def forward(self,
sequences: torch.LongTensor,
action_mask: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
outputs = self.model(sequences, attention_mask=attention_mask)
last_hidden_states = outputs['last_hidden_state']
values = self.value_head(last_hidden_states).squeeze(-1)
2023-02-14 14:17:25 +00:00
if action_mask is not None:
num_actions = action_mask.size(1)
prompt_mask = attention_mask[:, :-num_actions]
values = values[:, :-num_actions]
value = masked_mean(values, prompt_mask, dim=1)
2023-02-14 14:17:25 +00:00
return value
values = values[:, :-1]
2023-02-14 14:17:25 +00:00
value = values.mean(dim=1).squeeze(1)
return value