mirror of https://github.com/THUDM/ChatGLM2-6B
Add multi-gpu support
parent
549cdeb054
commit
b09e0c5a69
|
@ -3,14 +3,12 @@ import platform
|
|||
import signal
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
import readline
|
||||
from utils import load_model_on_gpus
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
||||
# model_path = "THUDM/chatglm2-6b"
|
||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
||||
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||
# from utils import load_model_on_gpus
|
||||
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||
model = model.eval()
|
||||
|
||||
os_name = platform.system()
|
||||
|
|
|
@ -158,10 +158,9 @@ async def predict(query: str, history: List[List[str]], model_id: str):
|
|||
if __name__ == "__main__":
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
||||
# model_path = "THUDM/chatglm2-6b"
|
||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
||||
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||
# from utils import load_model_on_gpus
|
||||
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||
model.eval()
|
||||
|
||||
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|
||||
|
|
|
@ -5,10 +5,9 @@ from utils import load_model_on_gpus
|
|||
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
||||
# model_path = "THUDM/chatglm2-6b"
|
||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
||||
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||
# from utils import load_model_on_gpus
|
||||
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||
model = model.eval()
|
||||
|
||||
"""Override Chatbot.postprocess"""
|
||||
|
|
|
@ -14,10 +14,9 @@ st.set_page_config(
|
|||
def get_model():
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
||||
# model_path = "THUDM/chatglm2-6b"
|
||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
||||
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||
# from utils import load_model_on_gpus
|
||||
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||
model = model.eval()
|
||||
return tokenizer, model
|
||||
|
||||
|
|
Loading…
Reference in New Issue