|
|
|
from locust import HttpUser, between, tag, task
|
|
|
|
|
|
|
|
|
|
|
|
class QuickstartUser(HttpUser):
|
|
|
|
wait_time = between(1, 5)
|
|
|
|
|
|
|
|
@tag("online-generation")
|
|
|
|
@task(5)
|
|
|
|
def completion(self):
|
|
|
|
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "False"})
|
|
|
|
|
|
|
|
@tag("online-generation")
|
|
|
|
@task(5)
|
|
|
|
def completion_streaming(self):
|
|
|
|
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "True"})
|
|
|
|
|
|
|
|
@tag("online-chat")
|
|
|
|
@task(5)
|
|
|
|
def chat(self):
|
|
|
|
self.client.post(
|
|
|
|
"/chat",
|
|
|
|
json={
|
|
|
|
"messages": [
|
|
|
|
{"role": "system", "content": "you are a helpful assistant"},
|
|
|
|
{"role": "user", "content": "what is 1+1?"},
|
|
|
|
],
|
|
|
|
"stream": "False",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
@tag("online-chat")
|
|
|
|
@task(5)
|
|
|
|
def chat_streaming(self):
|
|
|
|
self.client.post(
|
|
|
|
"/chat",
|
|
|
|
json={
|
|
|
|
"messages": [
|
|
|
|
{"role": "system", "content": "you are a helpful assistant"},
|
|
|
|
{"role": "user", "content": "what is 1+1?"},
|
|
|
|
],
|
|
|
|
"stream": "True",
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
# offline-generation is only for showing the usage, it will never be used in actual serving.
|
|
|
|
@tag("offline-generation")
|
|
|
|
@task(5)
|
|
|
|
def generate_streaming(self):
|
|
|
|
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "True"})
|
|
|
|
|
|
|
|
@tag("offline-generation")
|
|
|
|
@task(5)
|
|
|
|
def generate(self):
|
|
|
|
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "False"})
|
|
|
|
|
|
|
|
@tag("online-generation", "offline-generation")
|
|
|
|
@task
|
|
|
|
def health_check(self):
|
|
|
|
self.client.get("/ping")
|