mirror of https://github.com/THUDM/ChatGLM2-6B
Merge remote-tracking branch 'origin/main'
commit
4c87c2d9e8
|
@ -0,0 +1,63 @@
|
|||
name: 🐞 Bug/Help
|
||||
description: File a bug/issue
|
||||
title: "[BUG/Help] <title>"
|
||||
labels: []
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Is there an existing issue for this?
|
||||
description: Please search to see if an issue already exists for the bug you encountered.
|
||||
options:
|
||||
- label: I have searched the existing issues
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Current Behavior
|
||||
description: |
|
||||
A concise description of what you're experiencing, with screenshot attached if possible.
|
||||
Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: A concise description of what you expected to happen.
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Steps To Reproduce
|
||||
description: Steps to reproduce the behavior.
|
||||
placeholder: |
|
||||
1. In this environment...
|
||||
2. With this config...
|
||||
3. Run '...'
|
||||
4. See error...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: |
|
||||
examples:
|
||||
- **OS**: Ubuntu 20.04
|
||||
- **Python**: 3.8
|
||||
- **Transformers**: 4.26.1
|
||||
- **PyTorch**: 1.12
|
||||
- **CUDA Support**: True
|
||||
value: |
|
||||
- OS:
|
||||
- Python:
|
||||
- Transformers:
|
||||
- PyTorch:
|
||||
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :
|
||||
render: markdown
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Anything else?
|
||||
description: |
|
||||
Links? References? Anything that will give us more context about the issue you are encountering!
|
||||
validations:
|
||||
required: false
|
|
@ -0,0 +1 @@
|
|||
blank_issues_enabled: false
|
|
@ -0,0 +1,26 @@
|
|||
name: Feature request
|
||||
description: Suggest an idea for this project
|
||||
title: "[Feature] <title>"
|
||||
labels: []
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Is your feature request related to a problem? Please describe.
|
||||
description: |
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Solutions
|
||||
description: |
|
||||
Describe the solution you'd like
|
||||
A clear and concise description of what you want to happen.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context or screenshots about the feature request here.
|
||||
validations:
|
||||
required: false
|
|
@ -0,0 +1,56 @@
|
|||
from fastapi import FastAPI, Request
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
import uvicorn, json, datetime
|
||||
import torch
|
||||
|
||||
DEVICE = "cuda"
|
||||
DEVICE_ID = "0"
|
||||
CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE
|
||||
|
||||
|
||||
def torch_gc():
|
||||
if torch.cuda.is_available():
|
||||
with torch.cuda.device(CUDA_DEVICE):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.ipc_collect()
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.post("/")
|
||||
async def create_item(request: Request):
|
||||
global model, tokenizer
|
||||
json_post_raw = await request.json()
|
||||
json_post = json.dumps(json_post_raw)
|
||||
json_post_list = json.loads(json_post)
|
||||
prompt = json_post_list.get('prompt')
|
||||
history = json_post_list.get('history')
|
||||
max_length = json_post_list.get('max_length')
|
||||
top_p = json_post_list.get('top_p')
|
||||
temperature = json_post_list.get('temperature')
|
||||
response, history = model.chat(tokenizer,
|
||||
prompt,
|
||||
history=history,
|
||||
max_length=max_length if max_length else 2048,
|
||||
top_p=top_p if top_p else 0.7,
|
||||
temperature=temperature if temperature else 0.95)
|
||||
now = datetime.datetime.now()
|
||||
time = now.strftime("%Y-%m-%d %H:%M:%S")
|
||||
answer = {
|
||||
"response": response,
|
||||
"history": history,
|
||||
"status": 200,
|
||||
"time": time
|
||||
}
|
||||
log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"'
|
||||
print(log)
|
||||
torch_gc()
|
||||
return answer
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True, device='cuda')
|
||||
model.eval()
|
||||
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|
|
@ -0,0 +1,8 @@
|
|||
protobuf
|
||||
transformers==4.27.1
|
||||
cpm_kernels
|
||||
torch>=2.0
|
||||
gradio
|
||||
mdtex2html
|
||||
sentencepiece
|
||||
accelerate
|
Binary file not shown.
After Width: | Height: | Size: 587 KiB |
Loading…
Reference in New Issue