ColossalAI/applications/ChatGPT/chatgpt/models/base/critic.py

from typing import Optional

import torch
import torch.nn as nn

from ..lora import LoRAModule
from ..utils import masked_mean


class Critic(LoRAModule):
    """
    Critic model base class.

    Args:
        model (nn.Module): Critic model.
        value_head (nn.Module): Value head to get value.
        lora_rank (int): LoRA rank.
        lora_train_bias (str): LoRA bias training mode.
    """

    def __init__(self,
                 model: nn.Module,
                 value_head: nn.Module,
                 lora_rank: int = 0,
                 lora_train_bias: str = 'none') -> None:

        super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
        self.model = model
        self.value_head = value_head
        self.convert_to_lora()

    def forward(self,
                sequences: torch.LongTensor,
                action_mask: Optional[torch.Tensor] = None,
                attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
        outputs = self.model(sequences, attention_mask=attention_mask)
        last_hidden_states = outputs['last_hidden_state']

        values = self.value_head(last_hidden_states).squeeze(-1)

        if action_mask is not None:
            num_actions = action_mask.size(1)
            prompt_mask = attention_mask[:, :-num_actions]
            values = values[:, :-num_actions]
            value = masked_mean(values, prompt_mask, dim=1)
            return value

        values = values[:, :-1]
        value = values.mean(dim=1).squeeze(1)
        return value
[app] add chatgpt application (#2698) 2023-02-14 14:17:25 +00:00			`from typing import Optional`

			`import torch`
			`import torch.nn as nn`

change nn to models (#3032) 2023-03-07 08:34:22 +00:00			`from ..lora import LoRAModule`
			`from ..utils import masked_mean`
[app] add chatgpt application (#2698) 2023-02-14 14:17:25 +00:00

			`class Critic(LoRAModule):`
			`"""`
			`Critic model base class.`

			`Args:`
			`model (nn.Module): Critic model.`
			`value_head (nn.Module): Value head to get value.`
			`lora_rank (int): LoRA rank.`
			`lora_train_bias (str): LoRA bias training mode.`
			`"""`

			`def __init__(self,`
			`model: nn.Module,`
			`value_head: nn.Module,`
			`lora_rank: int = 0,`
			`lora_train_bias: str = 'none') -> None:`

			`super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)`
			`self.model = model`
			`self.value_head = value_head`
			`self.convert_to_lora()`

			`def forward(self,`
			`sequences: torch.LongTensor,`
			`action_mask: Optional[torch.Tensor] = None,`
			`attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:`
			`outputs = self.model(sequences, attention_mask=attention_mask)`
			`last_hidden_states = outputs['last_hidden_state']`

[chatgpt] change critic input as state (#3042) * fix Critic * fix Critic * fix Critic * fix neglect of attention mask * fix neglect of attention mask * fix neglect of attention mask * add return --------- Co-authored-by: yangwenjun <yangwenjun@soyoung.com> Co-authored-by: yangwjd <yangwjd@chanjet.com> 2023-03-08 07:18:02 +00:00			`values = self.value_head(last_hidden_states).squeeze(-1)`
[app] add chatgpt application (#2698) 2023-02-14 14:17:25 +00:00
			`if action_mask is not None:`
			`num_actions = action_mask.size(1)`
[chatgpt] change critic input as state (#3042) * fix Critic * fix Critic * fix Critic * fix neglect of attention mask * fix neglect of attention mask * fix neglect of attention mask * add return --------- Co-authored-by: yangwenjun <yangwenjun@soyoung.com> Co-authored-by: yangwjd <yangwjd@chanjet.com> 2023-03-08 07:18:02 +00:00			`prompt_mask = attention_mask[:, :-num_actions]`
			`values = values[:, :-num_actions]`
			`value = masked_mean(values, prompt_mask, dim=1)`
[app] add chatgpt application (#2698) 2023-02-14 14:17:25 +00:00			`return value`
[chatgpt] change critic input as state (#3042) * fix Critic * fix Critic * fix Critic * fix neglect of attention mask * fix neglect of attention mask * fix neglect of attention mask * add return --------- Co-authored-by: yangwenjun <yangwenjun@soyoung.com> Co-authored-by: yangwjd <yangwjd@chanjet.com> 2023-03-08 07:18:02 +00:00
			`values = values[:, :-1]`
[app] add chatgpt application (#2698) 2023-02-14 14:17:25 +00:00			`value = values.mean(dim=1).squeeze(1)`
			`return value`