mirror of https://github.com/hpcaitech/ColossalAI
Camille Zhong
2 years ago
6 changed files with 134 additions and 4 deletions
@ -0,0 +1,5 @@ |
|||||||
|
from .roberta_actor import RoBERTaActor |
||||||
|
from .roberta_critic import RoBERTaCritic |
||||||
|
from .roberta_rm import RoBERTaRM |
||||||
|
|
||||||
|
__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM'] |
@ -0,0 +1,35 @@ |
|||||||
|
from typing import Optional |
||||||
|
|
||||||
|
from transformers.models.roberta.configuration_roberta import RobertaConfig |
||||||
|
from transformers.models.roberta.modeling_roberta import RobertaForCausalLM |
||||||
|
|
||||||
|
from ..base import Actor |
||||||
|
|
||||||
|
class RoBERTaActor(Actor): |
||||||
|
""" |
||||||
|
RoBERTa Actor model. |
||||||
|
|
||||||
|
Args: |
||||||
|
pretrained (str): Pretrained model name or path. |
||||||
|
config (RoBERTaConfig): Model config. |
||||||
|
checkpoint (bool): Enable gradient checkpointing. |
||||||
|
lora_rank (int): Rank of the low-rank approximation. |
||||||
|
lora_train_bias (str): LoRA bias training mode. |
||||||
|
""" |
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, |
||||||
|
pretrained: Optional[str] = None, |
||||||
|
config: Optional[RobertaConfig] = None, |
||||||
|
checkpoint: bool = False, |
||||||
|
lora_rank: int = 0, |
||||||
|
lora_train_bias: str = 'none') -> None: |
||||||
|
if pretrained is not None: |
||||||
|
model = RobertaForCausalLM.from_pretrained(pretrained) |
||||||
|
elif config is not None: |
||||||
|
model = RobertaForCausalLM(config) |
||||||
|
else: |
||||||
|
model = RobertaForCausalLM(RobertaConfig()) |
||||||
|
if checkpoint: |
||||||
|
model.gradient_checkpointing_enable() |
||||||
|
super().__init__(model, lora_rank, lora_train_bias) |
@ -0,0 +1,38 @@ |
|||||||
|
from typing import Optional |
||||||
|
|
||||||
|
import torch.nn as nn |
||||||
|
from transformers.models.roberta.configuration_roberta import RobertaConfig |
||||||
|
from transformers.models.roberta.modeling_roberta import RobertaModel |
||||||
|
|
||||||
|
from ..base import Critic |
||||||
|
|
||||||
|
|
||||||
|
class RoBERTaCritic(Critic): |
||||||
|
""" |
||||||
|
RoBERTa Critic model. |
||||||
|
|
||||||
|
Args: |
||||||
|
pretrained (str): Pretrained model name or path. |
||||||
|
config (RoBERTa Config): Model config. |
||||||
|
checkpoint (bool): Enable gradient checkpointing. |
||||||
|
lora_rank (int): Rank of the low-rank approximation. |
||||||
|
lora_train_bias (str): LoRA bias training mode. |
||||||
|
""" |
||||||
|
|
||||||
|
def __init__(self, |
||||||
|
pretrained: Optional[str] = None, |
||||||
|
config: Optional[RobertaConfig] = None, |
||||||
|
checkpoint: bool = False, |
||||||
|
lora_rank: int = 0, |
||||||
|
lora_train_bias: str = 'none', |
||||||
|
**kwargs) -> None: |
||||||
|
if pretrained is not None: |
||||||
|
model = RobertaModel.from_pretrained(pretrained) |
||||||
|
elif config is not None: |
||||||
|
model = RobertaModel(config) |
||||||
|
else: |
||||||
|
model = RobertaModel(RobertaConfig()) |
||||||
|
if checkpoint: |
||||||
|
model.gradient_checkpointing_enable() |
||||||
|
value_head = nn.Linear(model.config.hidden_size, 1) |
||||||
|
super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) |
@ -0,0 +1,39 @@ |
|||||||
|
from typing import Optional |
||||||
|
|
||||||
|
import torch.nn as nn |
||||||
|
from transformers import RobertaConfig, RobertaModel |
||||||
|
|
||||||
|
|
||||||
|
from ..base import RewardModel |
||||||
|
|
||||||
|
|
||||||
|
class RoBERTaRM(RewardModel): |
||||||
|
""" |
||||||
|
RoBERTa Reward model. |
||||||
|
|
||||||
|
Args: |
||||||
|
pretrained (str): Pretrained model name or path. |
||||||
|
config (RoBERTaConfig): Model config. |
||||||
|
checkpoint (bool): Enable gradient checkpointing. |
||||||
|
lora_rank (int): Rank of the low-rank approximation. |
||||||
|
lora_train_bias (str): LoRA bias training mode. |
||||||
|
""" |
||||||
|
|
||||||
|
def __init__(self, |
||||||
|
pretrained: Optional[str] = None, |
||||||
|
config: Optional[RobertaConfig] = None, |
||||||
|
checkpoint: bool = False, |
||||||
|
lora_rank: int = 0, |
||||||
|
lora_train_bias: str = 'none') -> None: |
||||||
|
if pretrained is not None: |
||||||
|
model = RobertaModel.from_pretrained(pretrained) |
||||||
|
elif config is not None: |
||||||
|
model = RobertaModel(config) |
||||||
|
else: |
||||||
|
model = RobertaModel(RobertaConfig()) |
||||||
|
if checkpoint: |
||||||
|
model.gradient_checkpointing_enable() |
||||||
|
|
||||||
|
value_head = nn.Linear(model.config.hidden_size, 1) |
||||||
|
value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1)) |
||||||
|
super().__init__(model, value_head, lora_rank, lora_train_bias) |
Loading…
Reference in new issue