mirror of https://github.com/hpcaitech/ColossalAI
Camille Zhong
2 years ago
6 changed files with 134 additions and 4 deletions
@ -0,0 +1,5 @@
|
||||
from .roberta_actor import RoBERTaActor |
||||
from .roberta_critic import RoBERTaCritic |
||||
from .roberta_rm import RoBERTaRM |
||||
|
||||
__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM'] |
@ -0,0 +1,35 @@
|
||||
from typing import Optional |
||||
|
||||
from transformers.models.roberta.configuration_roberta import RobertaConfig |
||||
from transformers.models.roberta.modeling_roberta import RobertaForCausalLM |
||||
|
||||
from ..base import Actor |
||||
|
||||
class RoBERTaActor(Actor): |
||||
""" |
||||
RoBERTa Actor model. |
||||
|
||||
Args: |
||||
pretrained (str): Pretrained model name or path. |
||||
config (RoBERTaConfig): Model config. |
||||
checkpoint (bool): Enable gradient checkpointing. |
||||
lora_rank (int): Rank of the low-rank approximation. |
||||
lora_train_bias (str): LoRA bias training mode. |
||||
""" |
||||
|
||||
|
||||
def __init__(self, |
||||
pretrained: Optional[str] = None, |
||||
config: Optional[RobertaConfig] = None, |
||||
checkpoint: bool = False, |
||||
lora_rank: int = 0, |
||||
lora_train_bias: str = 'none') -> None: |
||||
if pretrained is not None: |
||||
model = RobertaForCausalLM.from_pretrained(pretrained) |
||||
elif config is not None: |
||||
model = RobertaForCausalLM(config) |
||||
else: |
||||
model = RobertaForCausalLM(RobertaConfig()) |
||||
if checkpoint: |
||||
model.gradient_checkpointing_enable() |
||||
super().__init__(model, lora_rank, lora_train_bias) |
@ -0,0 +1,38 @@
|
||||
from typing import Optional |
||||
|
||||
import torch.nn as nn |
||||
from transformers.models.roberta.configuration_roberta import RobertaConfig |
||||
from transformers.models.roberta.modeling_roberta import RobertaModel |
||||
|
||||
from ..base import Critic |
||||
|
||||
|
||||
class RoBERTaCritic(Critic): |
||||
""" |
||||
RoBERTa Critic model. |
||||
|
||||
Args: |
||||
pretrained (str): Pretrained model name or path. |
||||
config (RoBERTa Config): Model config. |
||||
checkpoint (bool): Enable gradient checkpointing. |
||||
lora_rank (int): Rank of the low-rank approximation. |
||||
lora_train_bias (str): LoRA bias training mode. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
pretrained: Optional[str] = None, |
||||
config: Optional[RobertaConfig] = None, |
||||
checkpoint: bool = False, |
||||
lora_rank: int = 0, |
||||
lora_train_bias: str = 'none', |
||||
**kwargs) -> None: |
||||
if pretrained is not None: |
||||
model = RobertaModel.from_pretrained(pretrained) |
||||
elif config is not None: |
||||
model = RobertaModel(config) |
||||
else: |
||||
model = RobertaModel(RobertaConfig()) |
||||
if checkpoint: |
||||
model.gradient_checkpointing_enable() |
||||
value_head = nn.Linear(model.config.hidden_size, 1) |
||||
super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) |
@ -0,0 +1,39 @@
|
||||
from typing import Optional |
||||
|
||||
import torch.nn as nn |
||||
from transformers import RobertaConfig, RobertaModel |
||||
|
||||
|
||||
from ..base import RewardModel |
||||
|
||||
|
||||
class RoBERTaRM(RewardModel): |
||||
""" |
||||
RoBERTa Reward model. |
||||
|
||||
Args: |
||||
pretrained (str): Pretrained model name or path. |
||||
config (RoBERTaConfig): Model config. |
||||
checkpoint (bool): Enable gradient checkpointing. |
||||
lora_rank (int): Rank of the low-rank approximation. |
||||
lora_train_bias (str): LoRA bias training mode. |
||||
""" |
||||
|
||||
def __init__(self, |
||||
pretrained: Optional[str] = None, |
||||
config: Optional[RobertaConfig] = None, |
||||
checkpoint: bool = False, |
||||
lora_rank: int = 0, |
||||
lora_train_bias: str = 'none') -> None: |
||||
if pretrained is not None: |
||||
model = RobertaModel.from_pretrained(pretrained) |
||||
elif config is not None: |
||||
model = RobertaModel(config) |
||||
else: |
||||
model = RobertaModel(RobertaConfig()) |
||||
if checkpoint: |
||||
model.gradient_checkpointing_enable() |
||||
|
||||
value_head = nn.Linear(model.config.hidden_size, 1) |
||||
value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1)) |
||||
super().__init__(model, value_head, lora_rank, lora_train_bias) |
Loading…
Reference in new issue