mirror of https://github.com/THUDM/ChatGLM2-6B
Add multi-gpu support
parent
549cdeb054
commit
b09e0c5a69
|
@ -3,14 +3,12 @@ import platform
|
||||||
import signal
|
import signal
|
||||||
from transformers import AutoTokenizer, AutoModel
|
from transformers import AutoTokenizer, AutoModel
|
||||||
import readline
|
import readline
|
||||||
from utils import load_model_on_gpus
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||||
# model_path = "THUDM/chatglm2-6b"
|
# from utils import load_model_on_gpus
|
||||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
|
|
||||||
os_name = platform.system()
|
os_name = platform.system()
|
||||||
|
|
|
@ -158,10 +158,9 @@ async def predict(query: str, history: List[List[str]], model_id: str):
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||||
# model_path = "THUDM/chatglm2-6b"
|
# from utils import load_model_on_gpus
|
||||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|
||||||
|
|
|
@ -5,10 +5,9 @@ from utils import load_model_on_gpus
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||||
# model_path = "THUDM/chatglm2-6b"
|
# from utils import load_model_on_gpus
|
||||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
|
|
||||||
"""Override Chatbot.postprocess"""
|
"""Override Chatbot.postprocess"""
|
||||||
|
|
|
@ -14,10 +14,9 @@ st.set_page_config(
|
||||||
def get_model():
|
def get_model():
|
||||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda()
|
||||||
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量
|
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
|
||||||
# model_path = "THUDM/chatglm2-6b"
|
# from utils import load_model_on_gpus
|
||||||
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
|
||||||
# model = load_model_on_gpus(model_path, num_gpus=2)
|
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
return tokenizer, model
|
return tokenizer, model
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue