mirror of https://github.com/InternLM/InternLM
				
				
				
			Update README_zh-CN.md
							parent
							
								
									fefef768af
								
							
						
					
					
						commit
						10dd53e5fb
					
				| 
						 | 
				
			
			@ -130,7 +130,12 @@ import torch
 | 
			
		|||
from transformers import AutoTokenizer, AutoModelForCausalLM
 | 
			
		||||
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True)
 | 
			
		||||
# 设置`torch_dtype=torch.float16`来将模型精度指定为torch.float16,否则可能会因为您的硬件原因造成显存不足的问题。
 | 
			
		||||
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True, torch_dtype=torch.float16).cuda()
 | 
			
		||||
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
 | 
			
		||||
# (可选) 如果在低资源设备上,可以通过bitsandbytes加载4-bit或8-bit量化的模型,进一步节省GPU显存.
 | 
			
		||||
  # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. 
 | 
			
		||||
  # pip install -U bitsandbytes
 | 
			
		||||
  # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
 | 
			
		||||
  # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
 | 
			
		||||
model = model.eval()
 | 
			
		||||
response, history = model.chat(tokenizer, "你好", history=[])
 | 
			
		||||
print(response)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue