diff --git a/applications/ColossalEval/colossal_eval/models/chatglm.py b/applications/ColossalEval/colossal_eval/models/chatglm.py index f293c4f69..9c70c0d2a 100644 --- a/applications/ColossalEval/colossal_eval/models/chatglm.py +++ b/applications/ColossalEval/colossal_eval/models/chatglm.py @@ -3,6 +3,8 @@ from typing import List import torch +from colossalai.utils import get_current_device + from .huggingface import HuggingFaceModel IGNORE_INDEX = -100 @@ -126,9 +128,9 @@ class ChatGLMModel(HuggingFaceModel): """ input_ids = torch.nn.utils.rnn.pad_sequence( input_ids_list, batch_first=True, padding_value=self.tokenizer.pad_token_id - ).to(torch.cuda.current_device()) + ).to(get_current_device()) labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX).to( - torch.cuda.current_device() + get_current_device() ) outputs = self.model(input_ids)[0] @@ -197,7 +199,7 @@ class ChatGLM2Model(ChatGLMModel): truncation=True, return_tensors="pt", max_length=self.model_max_length - max_new_tokens, - ).to(torch.cuda.current_device()) + ).to(get_current_device()) # Set output_scores=True to get prediction scores. outputs = self.model.generate( diff --git a/applications/ColossalEval/colossal_eval/models/huggingface.py b/applications/ColossalEval/colossal_eval/models/huggingface.py index 741c884f0..fff697e21 100644 --- a/applications/ColossalEval/colossal_eval/models/huggingface.py +++ b/applications/ColossalEval/colossal_eval/models/huggingface.py @@ -11,6 +11,7 @@ from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokeni from colossalai.logging import DistributedLogger from colossalai.shardformer import ShardConfig, ShardFormer +from colossalai.utils import get_current_device from .base import BaseModel @@ -128,12 +129,12 @@ class HuggingFaceModel(BaseModel): self.model = AutoModel.from_pretrained(path, **model_kwargs) shard_former = ShardFormer(shard_config) self.model, sharded_parameters = shard_former.optimize(self.model) - self.model.to(torch.cuda.current_device()) + self.model.to(get_current_device()) if peft_path is not None: raise NotImplementedError("ShardFormer for PEFT models is not implemented.") else: - self.model = AutoModel.from_pretrained(path, **model_kwargs).to(torch.cuda.current_device()) + self.model = AutoModel.from_pretrained(path, **model_kwargs).to(get_current_device()) if peft_path is not None: self.model = PeftModel.from_pretrained(self.model, peft_path, is_trainable=False) self.model.eval() @@ -155,11 +156,11 @@ class HuggingFaceModel(BaseModel): """ input_ids = torch.nn.utils.rnn.pad_sequence( input_ids_list, batch_first=True, padding_value=self.tokenizer.pad_token_id - ).to(torch.cuda.current_device()) + ).to(get_current_device()) labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX).to( - torch.cuda.current_device() + get_current_device() ) - attention_mask = input_ids.ne(self.tokenizer.pad_token_id).to(torch.cuda.current_device()) + attention_mask = input_ids.ne(self.tokenizer.pad_token_id).to(get_current_device()) outputs = self.model(input_ids, attention_mask=attention_mask)[0] @@ -464,7 +465,7 @@ class HuggingFaceModel(BaseModel): return_tensors="pt", return_token_type_ids=False, max_length=self.model_max_length - max_new_tokens, - ).to(torch.cuda.current_device()) + ).to(get_current_device()) # Set output_scores=True to get prediction scores. outputs = self.model.generate( @@ -598,12 +599,12 @@ class HuggingFaceCausalLM(HuggingFaceModel): self.model = AutoModelForCausalLM.from_pretrained(path, **model_kwargs) shard_former = ShardFormer(shard_config) self.model, sharded_parameters = shard_former.optimize(self.model) - self.model.to(torch.cuda.current_device()) + self.model.to(get_current_device()) if peft_path is not None: raise NotImplementedError("ShardFormer for PEFT models is not implemented.") else: - self.model = AutoModelForCausalLM.from_pretrained(path, **model_kwargs).to(torch.cuda.current_device()) + self.model = AutoModelForCausalLM.from_pretrained(path, **model_kwargs).to(get_current_device()) if peft_path is not None: self.model = PeftModel.from_pretrained(self.model, peft_path, is_trainable=False) diff --git a/applications/ColossalEval/examples/dataset_evaluation/inference.py b/applications/ColossalEval/examples/dataset_evaluation/inference.py index 5b09f9de8..a340f3bfd 100644 --- a/applications/ColossalEval/examples/dataset_evaluation/inference.py +++ b/applications/ColossalEval/examples/dataset_evaluation/inference.py @@ -8,6 +8,7 @@ import torch.distributed as dist from colossal_eval import dataset, models, utils import colossalai +from colossalai.accelerator import get_accelerator from colossalai.cluster import ProcessGroupMesh from colossalai.logging import get_dist_logger from colossalai.shardformer import ShardConfig @@ -82,6 +83,7 @@ def rm_and_merge( def main(args): colossalai.launch_from_torch(config={}, seed=42) + accelerator = get_accelerator() world_size = dist.get_world_size() rank = dist.get_rank() @@ -235,10 +237,10 @@ def main(args): ), ) - logger.info(f"Rank {rank} peak CUDA mem: {torch.cuda.max_memory_allocated()/1024**3:.3f} GB") + logger.info(f"Rank {rank} peak device mem: {accelerator.max_memory_allocated()/1024**3:.3f} GB") del model_ - torch.cuda.empty_cache() + accelerator.empty_cache() dist.barrier() if rank == 0: