[fix] fix typo s/muiti-node /multi-node etc. (#5448)

pull/5578/head
digger yu 2024-04-07 18:42:15 +08:00 committed by GitHub
parent 15055f9a36
commit a799ca343b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 16 additions and 16 deletions

View File

@ -670,7 +670,7 @@ def calculate_scores_form_logprobs(logprobs: Dict[str, Any]) -> float:
def calculate_scores_form_response(response: str, evaluation: Dict[str, Any]) -> int: def calculate_scores_form_response(response: str, evaluation: Dict[str, Any]) -> int:
""" """
Calculate the score from the response returned by gpt-3.5-turbo or gpt-4. Calculate the score from the response returned by gpt-3.5-turbo or gpt-4.
Different from text-davinci-003, this fuction directly calculates the score according to the plain response returned by gpt-3.5-turbo or gpt-4. Different from text-davinci-003, this function directly calculates the score according to the plain response returned by gpt-3.5-turbo or gpt-4.
Although text-davinci-003 can return log probabilities, it costs ten times as much as gpt-3.5-turbo. Although text-davinci-003 can return log probabilities, it costs ten times as much as gpt-3.5-turbo.
Args: Args:

View File

@ -128,13 +128,13 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--comm_overlap", "--comm_overlap",
action="store_true", action="store_true",
help="Use communication overlap for MoE. Recommended to enable for muiti-node training.", help="Use communication overlap for MoE. Recommended to enable for multi-node training.",
) )
# hierarchical all-to-all # hierarchical all-to-all
parser.add_argument( parser.add_argument(
"--hierarchical_alltoall", "--hierarchical_alltoall",
action="store_true", action="store_true",
help="Use hierarchical all-to-all for MoE. Recommended to enable for muiti-node training.", help="Use hierarchical all-to-all for MoE. Recommended to enable for multi-node training.",
) )
args = parser.parse_args() args = parser.parse_args()
@ -267,7 +267,7 @@ def main():
# ): # ):
# coordinator.print_on_master(f"Apply load balance") # coordinator.print_on_master(f"Apply load balance")
# apply_load_balance(model, optimizer) # apply_load_balance(model, optimizer)
# save ckeckpoint # save checkpoint
if (step + 1) % args.save_interval == 0: if (step + 1) % args.save_interval == 0:
coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}") coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}")
save_checkpoint( save_checkpoint(

View File

@ -52,7 +52,7 @@ class DocumentLoader:
def load_data(self, path: str) -> None: def load_data(self, path: str) -> None:
""" """
Load data. Please refer to https://python.langchain.com/docs/modules/data_connection/document_loaders/ Load data. Please refer to https://python.langchain.com/docs/modules/data_connection/document_loaders/
for sepcific format requirements. for specific format requirements.
Args: Args:
path: path to a file path: path to a file
To load files with glob path, here are some examples. To load files with glob path, here are some examples.

View File

@ -100,7 +100,7 @@ class ColossalCloudLLM(LLM):
def text_completion(self, prompt, gen_config, auth_config): def text_completion(self, prompt, gen_config, auth_config):
# Complusory Parameters # Required Parameters
endpoint = auth_config.pop('endpoint') endpoint = auth_config.pop('endpoint')
max_new_tokens = gen_config.pop('max_new_tokens') max_new_tokens = gen_config.pop('max_new_tokens')
# Optional Parameters # Optional Parameters

View File

@ -33,7 +33,7 @@ class ColossalAPI:
def __init__(self, model_type: str, model_path: str, ckpt_path: str = None) -> None: def __init__(self, model_type: str, model_path: str, ckpt_path: str = None) -> None:
""" """
Configurate model Configure model
""" """
if model_type + model_path + (ckpt_path or "") in ColossalAPI.__instances: if model_type + model_path + (ckpt_path or "") in ColossalAPI.__instances:
return return
@ -47,7 +47,7 @@ class ColossalAPI:
self.model.load_state_dict(state_dict) self.model.load_state_dict(state_dict)
self.model.to(torch.cuda.current_device()) self.model.to(torch.cuda.current_device())
# Configurate tokenizer # Configure tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
self.model.eval() self.model.eval()
@ -87,7 +87,7 @@ class ColossalAPI:
class VllmAPI: class VllmAPI:
def __init__(self, host: str = "localhost", port: int = 8077) -> None: def __init__(self, host: str = "localhost", port: int = 8077) -> None:
# Configurate api for model served through web # Configure api for model served through web
self.host = host self.host = host
self.port = port self.port = port
self.url = f"http://{self.host}:{self.port}/generate" self.url = f"http://{self.host}:{self.port}/generate"

View File

@ -36,7 +36,7 @@ class UniversalRetrievalConversation:
text_splitter_chunk_overlap=10, text_splitter_chunk_overlap=10,
) -> None: ) -> None:
""" """
Warpper for multilingual retrieval qa class (Chinese + English) Wrapper for multilingual retrieval qa class (Chinese + English)
Args: Args:
embedding_model_path: local or huggingface embedding model embedding_model_path: local or huggingface embedding model
embedding_model_device: embedding_model_device:

View File

@ -59,7 +59,7 @@ class CustomRetriever(BaseRetriever):
Add documents to retriever Add documents to retriever
Args: Args:
docs: the documents to add docs: the documents to add
cleanup: choose from "incremental" (update embeddings, skip existing embeddings) and "full" (destory and rebuild retriever) cleanup: choose from "incremental" (update embeddings, skip existing embeddings) and "full" (destroy and rebuild retriever)
mode: choose from "by source" (documents are grouped by source) and "merge" (documents are merged into one vector store) mode: choose from "by source" (documents are grouped by source) and "merge" (documents are merged into one vector store)
""" """
if cleanup == "full": if cleanup == "full":

View File

@ -49,7 +49,7 @@ def destroy_sql_database(sql_engine: Union[Engine, str]) -> None:
def detect_lang_naive(s): def detect_lang_naive(s):
""" """
Naive function for language detection, should be replaced by an independant layer Naive function for language detection, should be replaced by an independent layer
""" """
remove_nota = "[’·°–!\"#$%&'()*+,-./:;<=>?@,。?★、…【】()《》?“”‘’![\\]^_`{|}~]+" remove_nota = "[’·°–!\"#$%&'()*+,-./:;<=>?@,。?★、…【】()《》?“”‘’![\\]^_`{|}~]+"
s = re.sub(remove_nota, "", s) s = re.sub(remove_nota, "", s)

View File

@ -96,7 +96,7 @@ def set_openmoe_args(
load_balance_beam_width (int, optional): Expert load balance search's beam width. Defaults to 8. load_balance_beam_width (int, optional): Expert load balance search's beam width. Defaults to 8.
load_balance_group_swap_factor (float, optional): Expert load balance group swap factor. Longer value encourages less swap. Defaults to 0.4. load_balance_group_swap_factor (float, optional): Expert load balance group swap factor. Longer value encourages less swap. Defaults to 0.4.
enable_kernel (bool, optional): Use kernel optimization. Defaults to False. enable_kernel (bool, optional): Use kernel optimization. Defaults to False.
enable_comm_overlap (bool, optional): Use communication overlap for MoE. Recommended to enable for muiti-node training. Defaults to False. enable_comm_overlap (bool, optional): Use communication overlap for MoE. Recommended to enable for multi-node training. Defaults to False.
enable_hierarchical_alltoall (bool, optional): Use hierarchical alltoall for MoE. Defaults to False. enable_hierarchical_alltoall (bool, optional): Use hierarchical alltoall for MoE. Defaults to False.
""" """
moe_args = dict( moe_args = dict(

View File

@ -190,13 +190,13 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--comm_overlap", "--comm_overlap",
action="store_true", action="store_true",
help="Use communication overlap for MoE. Recommended to enable for muiti-node training.", help="Use communication overlap for MoE. Recommended to enable for multi-node training.",
) )
# hierarchical all-to-all # hierarchical all-to-all
parser.add_argument( parser.add_argument(
"--hierarchical_alltoall", "--hierarchical_alltoall",
action="store_true", action="store_true",
help="Use hierarchical all-to-all for MoE. Recommended to enable for muiti-node training.", help="Use hierarchical all-to-all for MoE. Recommended to enable for multi-node training.",
) )
args = parser.parse_args() args = parser.parse_args()
@ -366,7 +366,7 @@ def main():
): ):
coordinator.print_on_master(f"Apply load balance") coordinator.print_on_master(f"Apply load balance")
apply_load_balance(model, optimizer) apply_load_balance(model, optimizer)
# save ckeckpoint # save checkpoint
if (step + 1) % args.save_interval == 0: if (step + 1) % args.save_interval == 0:
coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}") coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}")
booster.save_model(model, args.output_path, shard=True) booster.save_model(model, args.output_path, shard=True)