ColossalAI/examples/inference/client/locustfile.py

from locust import HttpUser, between, tag, task


class QuickstartUser(HttpUser):
    wait_time = between(1, 5)

    @tag("online-generation")
    @task(5)
    def completion(self):
        self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "False"})

    @tag("online-generation")
    @task(5)
    def completion_streaming(self):
        self.client.post("/completion", json={"prompt": "hello, who are you? ", "stream": "True"})

    @tag("online-chat")
    @task(5)
    def chat(self):
        self.client.post(
            "/chat",
            json={
                "messages": [
                    {"role": "system", "content": "you are a helpful assistant"},
                    {"role": "user", "content": "what is 1+1?"},
                ],
                "stream": "False",
            },
        )

    @tag("online-chat")
    @task(5)
    def chat_streaming(self):
        self.client.post(
            "/chat",
            json={
                "messages": [
                    {"role": "system", "content": "you are a helpful assistant"},
                    {"role": "user", "content": "what is 1+1?"},
                ],
                "stream": "True",
            },
        )

    # offline-generation is only for showing the usage, it will never be used in actual serving.
    @tag("offline-generation")
    @task(5)
    def generate_streaming(self):
        self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "True"})

    @tag("offline-generation")
    @task(5)
    def generate(self):
        self.client.post("/generate", json={"prompt": "Can you help me? ", "stream": "False"})

    @tag("online-generation", "offline-generation")
    @task
    def health_check(self):
        self.client.get("/ping")