diff --git a/API.py b/API.py new file mode 100644 index 0000000..7149f78 --- /dev/null +++ b/API.py @@ -0,0 +1,33 @@ +from typing import Optional +from fastapi import FastAPI, Request +from pydantic import BaseModel +from transformers import AutoTokenizer, AutoModel +import uvicorn, json, time, datetime, os, platform + +app = FastAPI() +@app.post("/") +async def create_item(request: Request): + global history, model, tokenizer + jsonPostRaw = await request.json() + jsonPost = json.dumps(jsonPostRaw) + jsonPostList = json.loads(jsonPost) + prompt = jsonPostList.get('prompt') + response, history = model.chat(tokenizer, prompt, history=history) + now = datetime.datetime.now() + time = now.strftime("%Y-%m-%d %H:%M:%S") + answer = { + "response":response, + "status":200, + "time":time + } + log = "["+time+"] "+'device:"'+jsonPostList.get('device')+'", prompt:"'+prompt+'", response:"'+repr(response)+'"' + print(log) + return answer + +if __name__ == '__main__': + uvicorn.run('API:app',host='0.0.0.0',port=8000,workers=1) + +history = [] +tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) +model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().quantize(4).cuda() +model = model.eval()