from transformers import AutoModel model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()