mirror of https://github.com/THUDM/ChatGLM2-6B
Merge branch 'THUDM:main' into main
commit
f45c3b9828
|
@ -316,6 +316,8 @@ model = AutoModel.from_pretrained("your local path", trust_remote_code=True).to(
|
||||||
此时可以使用量化后的模型 chatglm2-6b-int4。因为 GPU 上量化的 kernel 是使用 CUDA 编写的,因此无法在 MacOS 上使用,只能使用 CPU 进行推理。
|
此时可以使用量化后的模型 chatglm2-6b-int4。因为 GPU 上量化的 kernel 是使用 CUDA 编写的,因此无法在 MacOS 上使用,只能使用 CPU 进行推理。
|
||||||
为了充分使用 CPU 并行,还需要[单独安装 OpenMP](FAQ.md#q1)。
|
为了充分使用 CPU 并行,还需要[单独安装 OpenMP](FAQ.md#q1)。
|
||||||
|
|
||||||
|
在 Mac 上进行推理也可以使用 [ChatGLM.cpp](https://github.com/li-plus/chatglm.cpp)
|
||||||
|
|
||||||
### 多卡部署
|
### 多卡部署
|
||||||
如果你有多张 GPU,但是每张 GPU 的显存大小都不足以容纳完整的模型,那么可以将模型切分在多张GPU上。首先安装 accelerate: `pip install accelerate`,然后通过如下方法加载模型:
|
如果你有多张 GPU,但是每张 GPU 的显存大小都不足以容纳完整的模型,那么可以将模型切分在多张GPU上。首先安装 accelerate: `pip install accelerate`,然后通过如下方法加载模型:
|
||||||
```python
|
```python
|
||||||
|
|
|
@ -10,7 +10,7 @@ torchrun --standalone --nnodes=1 --nproc-per-node=$NUM_GPUS main.py \
|
||||||
--overwrite_cache \
|
--overwrite_cache \
|
||||||
--prompt_column content \
|
--prompt_column content \
|
||||||
--response_column summary \
|
--response_column summary \
|
||||||
--model_name_or_path chatglm2-6b \
|
--model_name_or_path THUDM/chatglm2-6b \
|
||||||
--ptuning_checkpoint ./output/$CHECKPOINT/checkpoint-$STEP \
|
--ptuning_checkpoint ./output/$CHECKPOINT/checkpoint-$STEP \
|
||||||
--output_dir ./output/$CHECKPOINT \
|
--output_dir ./output/$CHECKPOINT \
|
||||||
--overwrite_output_dir \
|
--overwrite_output_dir \
|
||||||
|
|
|
@ -178,7 +178,7 @@ def main():
|
||||||
return model_inputs
|
return model_inputs
|
||||||
|
|
||||||
def preprocess_function_train(examples):
|
def preprocess_function_train(examples):
|
||||||
max_seq_length = data_args.max_source_length + data_args.max_target_length
|
max_seq_length = data_args.max_source_length + data_args.max_target_length + 1
|
||||||
|
|
||||||
model_inputs = {
|
model_inputs = {
|
||||||
"input_ids": [],
|
"input_ids": [],
|
||||||
|
@ -335,7 +335,7 @@ def main():
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
||||||
save_prefixencoder=model_args.pre_seq_len is not None
|
save_changed=model_args.pre_seq_len is not None
|
||||||
)
|
)
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
|
|
3782
ptuning/trainer.py
3782
ptuning/trainer.py
File diff suppressed because it is too large
Load Diff
|
@ -19,7 +19,7 @@ from torch import nn
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
from transformers.deepspeed import is_deepspeed_zero3_enabled
|
from transformers.deepspeed import is_deepspeed_zero3_enabled
|
||||||
from trainer import Trainer
|
from trainer import PrefixTrainer
|
||||||
from transformers.trainer_utils import PredictionOutput
|
from transformers.trainer_utils import PredictionOutput
|
||||||
from transformers.utils import logging
|
from transformers.utils import logging
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ from transformers.utils import logging
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Seq2SeqTrainer(Trainer):
|
class Seq2SeqTrainer(PrefixTrainer):
|
||||||
def evaluate(
|
def evaluate(
|
||||||
self,
|
self,
|
||||||
eval_dataset: Optional[Dataset] = None,
|
eval_dataset: Optional[Dataset] = None,
|
||||||
|
|
Loading…
Reference in New Issue