Update 7B_sft.py

pull/375/head
Wenwen Qu 2023-08-24 18:54:00 +08:00 committed by GitHub
parent 86bcda5ca9
commit 742a21677b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions

View File

@ -126,7 +126,6 @@ model = dict(
layer_norm_epsilon=1e-5,
use_flash_attn=True,
num_chunks=1, # if num_chunks > 1, interleaved pipeline scheduler is used.
sequence_parallel=False,
num_experts=8,
)
"""
@ -144,6 +143,8 @@ tensor parallel: tensor parallel size, usually the number of GPUs per node.
parallel = dict(
zero1=8,
pipeline=dict(size=1, interleaved_overlap=True),
sequence_parallel=False,
expert=2,
)
cudnn_deterministic = False