Making large AI models cheaper, faster and more accessible
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

60 lines
1.7 KiB

from locust import HttpUser, between, tag, task
class QuickstartUser(HttpUser):
wait_time = between(1, 5)
@tag("online-generation")
@task(5)
def completion(self):
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "False"})
@tag("online-generation")
@task(5)
def completion_streaming(self):
self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "True"})
@tag("online-chat")
@task(5)
def chat(self):
self.client.post(
"/chat",
json={
"messages": [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "what is 1+1?"},
],
"stream": "False",
},
)
@tag("online-chat")
@task(5)
def chat_streaming(self):
self.client.post(
"/chat",
json={
"messages": [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "what is 1+1?"},
],
"stream": "True",
},
)
# offline-generation is only for showing the usage, it will never be used in actual serving.
@tag("offline-generation")
@task(5)
def generate_streaming(self):
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "True"})
@tag("offline-generation")
@task(5)
def generate(self):
self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "False"})
@tag("online-generation", "offline-generation")
@task
def health_check(self):
self.client.get("/ping")