mirror of https://github.com/hpcaitech/ColossalAI
aibig-modeldata-parallelismdeep-learningdistributed-computingfoundation-modelsheterogeneous-traininghpcinferencelarge-scalemodel-parallelismpipeline-parallelism
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
1.7 KiB
59 lines
1.7 KiB
from locust import HttpUser, between, tag, task |
|
|
|
|
|
class QuickstartUser(HttpUser): |
|
wait_time = between(1, 5) |
|
|
|
@tag("online-generation") |
|
@task(5) |
|
def completion(self): |
|
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "False"}) |
|
|
|
@tag("online-generation") |
|
@task(5) |
|
def completion_streaming(self): |
|
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "True"}) |
|
|
|
@tag("online-chat") |
|
@task(5) |
|
def chat(self): |
|
self.client.post( |
|
"/chat", |
|
json={ |
|
"messages": [ |
|
{"role": "system", "content": "you are a helpful assistant"}, |
|
{"role": "user", "content": "what is 1+1?"}, |
|
], |
|
"stream": "False", |
|
}, |
|
) |
|
|
|
@tag("online-chat") |
|
@task(5) |
|
def chat_streaming(self): |
|
self.client.post( |
|
"/chat", |
|
json={ |
|
"messages": [ |
|
{"role": "system", "content": "you are a helpful assistant"}, |
|
{"role": "user", "content": "what is 1+1?"}, |
|
], |
|
"stream": "True", |
|
}, |
|
) |
|
|
|
# offline-generation is only for showing the usage, it will never be used in actual serving. |
|
@tag("offline-generation") |
|
@task(5) |
|
def generate_streaming(self): |
|
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "True"}) |
|
|
|
@tag("offline-generation") |
|
@task(5) |
|
def generate(self): |
|
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "False"}) |
|
|
|
@tag("online-generation", "offline-generation") |
|
@task |
|
def health_check(self): |
|
self.client.get("/ping")
|
|
|