[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
pull/6119/head
pre-commit-ci[bot] 2024-11-11 11:39:15 +00:00
parent 794e0d4f4a
commit 0bfb0d32a8
3 changed files with 11 additions and 5 deletions

View File

@ -7,7 +7,7 @@ from .loader import (
StatefulDistributedSampler,
load_tokenized_dataset,
)
from .tokenization_utils import tokenize_kto, tokenize_prompt, tokenize_rlhf, tokenize_sft, tokenize_process_reward
from .tokenization_utils import tokenize_kto, tokenize_process_reward, tokenize_prompt, tokenize_rlhf, tokenize_sft
__all__ = [
"tokenize_prompt",
@ -23,5 +23,5 @@ __all__ = [
"tokenize_kto",
"setup_conversation_template",
"Conversation",
"tokenize_process_reward"
"tokenize_process_reward",
]

View File

@ -3,7 +3,6 @@ import os
from dataclasses import dataclass, field
from typing import Any, Dict, List
import torch.distributed as dist
from transformers import AutoTokenizer, PreTrainedTokenizer
from colossalai.logging import get_dist_logger

View File

@ -12,7 +12,14 @@ import random
import time
from multiprocessing import cpu_count
from coati.dataset import setup_conversation_template, tokenize_kto, tokenize_prompt, tokenize_rlhf, tokenize_sft, tokenize_process_reward
from coati.dataset import (
setup_conversation_template,
tokenize_kto,
tokenize_process_reward,
tokenize_prompt,
tokenize_rlhf,
tokenize_sft,
)
from datasets import dataset_dict, load_dataset
from transformers import AutoTokenizer
@ -28,7 +35,7 @@ def main():
type=str,
required=True,
default=None,
choices=["sft", "prompt", "preference", "kto", 'prm'],
choices=["sft", "prompt", "preference", "kto", "prm"],
help="Type of dataset, chose from 'sft', 'prompt', 'preference'. 'kto'",
)
parser.add_argument(