|
|
|
@ -158,10 +158,9 @@ async def predict(query: str, history: List[List[str]], model_id: str):
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) |
|
|
|
|
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda() |
|
|
|
|
# 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量 |
|
|
|
|
# model_path = "THUDM/chatglm2-6b" |
|
|
|
|
# tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) |
|
|
|
|
# model = load_model_on_gpus(model_path, num_gpus=2) |
|
|
|
|
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量 |
|
|
|
|
# from utils import load_model_on_gpus |
|
|
|
|
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2) |
|
|
|
|
model.eval() |
|
|
|
|
|
|
|
|
|
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1) |
|
|
|
|