From 4fc3a32c7e7a4766ae28bd2720b0d577ac9b89aa Mon Sep 17 00:00:00 2001 From: AllentDan <41138331+AllentDan@users.noreply.github.com> Date: Thu, 16 Jan 2025 11:45:29 +0800 Subject: [PATCH] Update ecosystem documentation (#814) --- ecosystem/README.md | 35 +++++++++++++++++++++++++++++++++-- ecosystem/README_zh-CN.md | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/ecosystem/README.md b/ecosystem/README.md index ef918f8..3b435d9 100644 --- a/ecosystem/README.md +++ b/ecosystem/README.md @@ -86,6 +86,28 @@ for output in outputs: print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") ``` +### [SGLang](https://github.com/sgl-project/sglang) + +`SGLang` is a fast serving framework for large language models and vision language models. + +After the installation following the official [documentation](https://docs.sglang.ai/start/install.html), you can conduct the `internlm3-8b-instruct` model inference as follows: + +```shell +python3 -m sglang.launch_server --model internlm/internlm3-8b-instruct --trust-remote-code --chat-template internlm2-chat +``` + +```shell +curl http://127.0.0.1:30000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer EMPTY" \ + -d '{ + "model": "internlm/internlm3-8b-instruct", + "messages": [{"role": "user", "content": "Introduce Shanghai"}], + "stream": false + }' \ + --no-buffer +``` + ### [TGI](https://github.com/huggingface/text-generation-inference) TGI is a toolkit for deploying and serving Large Language Models (LLMs). The easiest way of deploying a LLM is using the official Docker container: @@ -221,8 +243,9 @@ from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate llm = ChatOpenAI( - api_key="a dummy key", - base_ur='https://0.0.0.0:23333/v1') + model_name="a-model", + openai_api_key="a dummy key", + openai_api_base='https://0.0.0.0:23333/v1') prompt = ChatPromptTemplate.from_messages([ ("system", "You are a world class technical documentation writer."), ("user", "{input}") @@ -245,6 +268,13 @@ It chooses ollama as the LLM inference engine locally. An example can be found f Therefore, you can integrate InternLM2 or InternLM2.5 models to LlamaIndex smoothly if you can deploying them with `ollama` as guided in the [ollama section](#ollama) +### [open-webui](https://github.com/open-webui/open-webui) + +Open WebUI is an extensible, feature-rich, and user-friendly self-hosted AI platform designed to run completely offline. It supports Ollama services and other compatible OpenAI API services, and comes with a built-in RAG reasoning engine, making it a powerful AI deployment solution. + +1. You can start an API service with LMDeploy, or launch the service with ollama. +2. Follow the [guidance](https://github.com/open-webui/open-webui?tab=readme-ov-file#installation-via-python-pip-)to install Open WebUI, and start the webui service with open-webui serve. Open the webui in your browser. +3. Refer to the [documentation](https://docs.openwebui.com/getting-started/quick-start/starting-with-ollama#step-2-managing-your-ollama-instance). Inside the opened page, find the settings, configure the OpenAI-like services or ollama services. Once configured, you can choose a model to engage in conversation. ### [LazyLLM](https://github.com/LazyAGI/LazyLLM) @@ -282,6 +312,7 @@ from lazyllm import pipeline, parallel, bind, SentenceSplitter, Document, Retrie prompt = 'You will play the role of an AI Q&A assistant and complete a dialogue task. In this task, you need to provide your answer based on the given context and question.' ``` + ```python diff --git a/ecosystem/README_zh-CN.md b/ecosystem/README_zh-CN.md index 0e5407f..e299c36 100644 --- a/ecosystem/README_zh-CN.md +++ b/ecosystem/README_zh-CN.md @@ -86,6 +86,28 @@ for output in outputs: print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") ``` +### [SGLang](https://github.com/sgl-project/sglang) + +`SGLang` 是一个用于 LLMs 和 VLMs 的高效服务工具。 + +根据官方 [文档](https://docs.sglang.ai/start/install.html)安装完成后, 可以使用 `internlm3-8b-instruct` 模型进行如下的服务与调用: + +```shell +python3 -m sglang.launch_server --model internlm/internlm3-8b-instruct --trust-remote-code --chat-template internlm2-chat +``` + +```shell +curl http://127.0.0.1:30000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer EMPTY" \ + -d '{ + "model": "internlm/internlm3-8b-instruct", + "messages": [{"role": "user", "content": "Introduce Shanghai"}], + "stream": false + }' \ + --no-buffer +``` + ### [TGI](https://github.com/huggingface/text-generation-inference) TGI 是一个用于部署和提供 LLMs 服务的工具包。部署 LLM 服务最简单的方法是使用官方的 Docker 容器: @@ -221,8 +243,9 @@ from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate llm = ChatOpenAI( - api_key="a dummy key", - base_ur='https://0.0.0.0:23333/v1') + model_name="a-model", + openai_api_key="a dummy key", + openai_api_base='https://0.0.0.0:23333/v1') prompt = ChatPromptTemplate.from_messages([ ("system", "You are a world class technical documentation writer."), ("user", "{input}") @@ -245,6 +268,14 @@ LlamaIndex 是一个用于构建上下文增强型 LLM 应用程序的框架。 因此,如果能够按照 [ollama 章节](#ollama)使用 ollama 部署浦语模型,你就可以顺利地将浦语模型集成到 LlamaIndex 中。 +### [open-webui](https://github.com/open-webui/open-webui) + +Open WebUI 是一个可扩展、功能丰富且用户友好的自托管人工智能平台,旨在完全离线运行。它支持 Ollama 服务和其他兼容 OpenAI 的 API 服务,并内置 RAG 推理引擎,使其成为强大的 AI 部署解决方案。 + +1. 可以用 LMDeploy 启动一个 api_server 服务,或者用 ollama 启动服务。 +2. 按照 [引导](https://github.com/open-webui/open-webui?tab=readme-ov-file#installation-via-python-pip-) 安装 open-webui,并 `open-webui serve` 启动 webui 服务。浏览器打开 webui。 +3. 参考 [文档](https://docs.openwebui.com/getting-started/quick-start/starting-with-ollama#step-2-managing-your-ollama-instance)。在打开的页面内部找到设置,配置好类 OpenAI 服务或者 ollama 服务,配置完就可以选用某个模型进行对话了。 + ### [LazyLLM](https://github.com/LazyAGI/LazyLLM) LazyLLM 是一个的低代码构建多 Agent 大模型应用的开发工具,相比于 LangChain 和 LLamaIndex,其具有极高的灵活性和易用性。 @@ -266,6 +297,7 @@ from lazyllm import TrainableModule, WebModule m = TrainableModule('internlm2-chat-7b').trainset('/patt/to/your_data.json').mode('finetune') WebModule(m).update().wait() ``` + 值的一提的是,无论您用 InternLM 系列的任何一个模型,都可以使用 LazyLLM 进行推理和微调,您都无需考虑模型的切分策略,也无需考虑模型的特殊 token。
如果您想搭建自己的 RAG 应用,那么您无需像使用 LangChain 一样先启动服务推理服务,再配置 ip 和端口去启动应用程序。参考如下代码,您可以借助 LazyLLM,使用 InternLM 系列的模型,十行代码搭建高度定制的 RAG 应用,且附带文档管理服务(文档需指定本地绝对路径,可从这里下载:[rag_master](https://huggingface.co/datasets/Jing0o0Xin/rag_master)): @@ -279,6 +311,7 @@ from lazyllm import pipeline, parallel, bind, SentenceSplitter, Document, Retrie prompt = '你将扮演一个人工智能问答助手的角色,完成一项对话任务。在这个任务中,你需要根据给定的上下文以及问题,给出你的回答。' ``` + ```python