mirror of https://github.com/THUDM/ChatGLM2-6B
Merge remote-tracking branch 'origin/main'
commit
4c87c2d9e8
|
@ -0,0 +1,63 @@
|
||||||
|
name: 🐞 Bug/Help
|
||||||
|
description: File a bug/issue
|
||||||
|
title: "[BUG/Help] <title>"
|
||||||
|
labels: []
|
||||||
|
body:
|
||||||
|
- type: checkboxes
|
||||||
|
attributes:
|
||||||
|
label: Is there an existing issue for this?
|
||||||
|
description: Please search to see if an issue already exists for the bug you encountered.
|
||||||
|
options:
|
||||||
|
- label: I have searched the existing issues
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Current Behavior
|
||||||
|
description: |
|
||||||
|
A concise description of what you're experiencing, with screenshot attached if possible.
|
||||||
|
Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Expected Behavior
|
||||||
|
description: A concise description of what you expected to happen.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Steps To Reproduce
|
||||||
|
description: Steps to reproduce the behavior.
|
||||||
|
placeholder: |
|
||||||
|
1. In this environment...
|
||||||
|
2. With this config...
|
||||||
|
3. Run '...'
|
||||||
|
4. See error...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Environment
|
||||||
|
description: |
|
||||||
|
examples:
|
||||||
|
- **OS**: Ubuntu 20.04
|
||||||
|
- **Python**: 3.8
|
||||||
|
- **Transformers**: 4.26.1
|
||||||
|
- **PyTorch**: 1.12
|
||||||
|
- **CUDA Support**: True
|
||||||
|
value: |
|
||||||
|
- OS:
|
||||||
|
- Python:
|
||||||
|
- Transformers:
|
||||||
|
- PyTorch:
|
||||||
|
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :
|
||||||
|
render: markdown
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Anything else?
|
||||||
|
description: |
|
||||||
|
Links? References? Anything that will give us more context about the issue you are encountering!
|
||||||
|
validations:
|
||||||
|
required: false
|
|
@ -0,0 +1 @@
|
||||||
|
blank_issues_enabled: false
|
|
@ -0,0 +1,26 @@
|
||||||
|
name: Feature request
|
||||||
|
description: Suggest an idea for this project
|
||||||
|
title: "[Feature] <title>"
|
||||||
|
labels: []
|
||||||
|
body:
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Is your feature request related to a problem? Please describe.
|
||||||
|
description: |
|
||||||
|
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Solutions
|
||||||
|
description: |
|
||||||
|
Describe the solution you'd like
|
||||||
|
A clear and concise description of what you want to happen.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Additional context
|
||||||
|
description: Add any other context or screenshots about the feature request here.
|
||||||
|
validations:
|
||||||
|
required: false
|
|
@ -0,0 +1,56 @@
|
||||||
|
from fastapi import FastAPI, Request
|
||||||
|
from transformers import AutoTokenizer, AutoModel
|
||||||
|
import uvicorn, json, datetime
|
||||||
|
import torch
|
||||||
|
|
||||||
|
DEVICE = "cuda"
|
||||||
|
DEVICE_ID = "0"
|
||||||
|
CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE
|
||||||
|
|
||||||
|
|
||||||
|
def torch_gc():
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
with torch.cuda.device(CUDA_DEVICE):
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
torch.cuda.ipc_collect()
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/")
|
||||||
|
async def create_item(request: Request):
|
||||||
|
global model, tokenizer
|
||||||
|
json_post_raw = await request.json()
|
||||||
|
json_post = json.dumps(json_post_raw)
|
||||||
|
json_post_list = json.loads(json_post)
|
||||||
|
prompt = json_post_list.get('prompt')
|
||||||
|
history = json_post_list.get('history')
|
||||||
|
max_length = json_post_list.get('max_length')
|
||||||
|
top_p = json_post_list.get('top_p')
|
||||||
|
temperature = json_post_list.get('temperature')
|
||||||
|
response, history = model.chat(tokenizer,
|
||||||
|
prompt,
|
||||||
|
history=history,
|
||||||
|
max_length=max_length if max_length else 2048,
|
||||||
|
top_p=top_p if top_p else 0.7,
|
||||||
|
temperature=temperature if temperature else 0.95)
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
time = now.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
answer = {
|
||||||
|
"response": response,
|
||||||
|
"history": history,
|
||||||
|
"status": 200,
|
||||||
|
"time": time
|
||||||
|
}
|
||||||
|
log = "[" + time + "] " + '", prompt:"' + prompt + '", response:"' + repr(response) + '"'
|
||||||
|
print(log)
|
||||||
|
torch_gc()
|
||||||
|
return answer
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
||||||
|
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True, device='cuda')
|
||||||
|
model.eval()
|
||||||
|
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|
|
@ -0,0 +1,8 @@
|
||||||
|
protobuf
|
||||||
|
transformers==4.27.1
|
||||||
|
cpm_kernels
|
||||||
|
torch>=2.0
|
||||||
|
gradio
|
||||||
|
mdtex2html
|
||||||
|
sentencepiece
|
||||||
|
accelerate
|
Binary file not shown.
After Width: | Height: | Size: 587 KiB |
Loading…
Reference in New Issue