mirror of https://github.com/hpcaitech/ColossalAI
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.cipull/5842/head
parent
4c69e2dc91
commit
df612434c9
|
@ -162,4 +162,4 @@ coverage.xml
|
||||||
|
|
||||||
# log, test files - ColossalChat
|
# log, test files - ColossalChat
|
||||||
applications/ColossalChat/logs
|
applications/ColossalChat/logs
|
||||||
applications/ColossalChat/tests/logs
|
applications/ColossalChat/tests/logs
|
||||||
|
|
2
LICENSE
2
LICENSE
|
@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
|
|
|
@ -8,11 +8,10 @@ import argparse
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from transformers import LlamaTokenizer, LlamaForCausalLM
|
from transformers import LlamaForCausalLM, LlamaTokenizer
|
||||||
|
|
||||||
from colossalai.logging import get_dist_logger
|
from colossalai.logging import get_dist_logger
|
||||||
|
|
||||||
|
|
||||||
logger = get_dist_logger()
|
logger = get_dist_logger()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,8 @@ import os
|
||||||
from typing import Any, Dict, Tuple, Union
|
from typing import Any, Dict, Tuple, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.optim.optimizer import Optimizer
|
|
||||||
from torch.optim.lr_scheduler import _LRScheduler
|
from torch.optim.lr_scheduler import _LRScheduler
|
||||||
|
from torch.optim.optimizer import Optimizer
|
||||||
|
|
||||||
from colossalai.booster import Booster
|
from colossalai.booster import Booster
|
||||||
from colossalai.cluster import DistCoordinator
|
from colossalai.cluster import DistCoordinator
|
||||||
|
|
|
@ -1,20 +1,19 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import Optional, List, Dict, Tuple, Callable, Any
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from transformers import PreTrainedTokenizer
|
from transformers import PreTrainedTokenizer
|
||||||
from transformers.utils import logging
|
|
||||||
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
|
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
|
||||||
|
from transformers.utils import logging
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_prompt_template(
|
def get_prompt_template(
|
||||||
input_query:str,
|
input_query: str,
|
||||||
history:List[Dict]= None,
|
history: List[Dict] = None,
|
||||||
roles:list = ["", "Human", "Assistant"],
|
roles: list = ["", "Human", "Assistant"],
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Generates a prompt template for chat models based on input and history.
|
Generates a prompt template for chat models based on input and history.
|
||||||
|
@ -32,7 +31,7 @@ def get_prompt_template(
|
||||||
new_history = []
|
new_history = []
|
||||||
else:
|
else:
|
||||||
new_history = deepcopy(history)
|
new_history = deepcopy(history)
|
||||||
|
|
||||||
new_history.append({"role": roles[1], "message": input_query.strip()})
|
new_history.append({"role": roles[1], "message": input_query.strip()})
|
||||||
new_history.append({"role": roles[2], "message": None})
|
new_history.append({"role": roles[2], "message": None})
|
||||||
|
|
||||||
|
@ -48,22 +47,23 @@ def get_prompt_template(
|
||||||
prompt += f"{role}: <s>"
|
prompt += f"{role}: <s>"
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def streaming_chat(
|
def streaming_chat(
|
||||||
model: Any,
|
model: Any,
|
||||||
tokenizer: PreTrainedTokenizer,
|
tokenizer: PreTrainedTokenizer,
|
||||||
input_query: str,
|
input_query: str,
|
||||||
history: List[Dict] = None,
|
history: List[Dict] = None,
|
||||||
roles: list = ["", "Human", "Assistant"],
|
roles: list = ["", "Human", "Assistant"],
|
||||||
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
|
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
|
||||||
temperature: float = 0.8,
|
temperature: float = 0.8,
|
||||||
top_p: float = 0.95,
|
top_p: float = 0.95,
|
||||||
top_k: int = 50,
|
top_k: int = 50,
|
||||||
do_sample: bool = True,
|
do_sample: bool = True,
|
||||||
length_penalty: float = 1.2,
|
length_penalty: float = 1.2,
|
||||||
max_new_tokens: int = 512,
|
max_new_tokens: int = 512,
|
||||||
logits_processor: LogitsProcessorList = None,
|
logits_processor: LogitsProcessorList = None,
|
||||||
return_past_key_values: bool = False,
|
return_past_key_values: bool = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -87,7 +87,7 @@ def streaming_chat(
|
||||||
**kwargs: Additional keyword arguments for generation.
|
**kwargs: Additional keyword arguments for generation.
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
|
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
|
||||||
optionally the updated past key values if `return_past_key_values` is True.
|
optionally the updated past key values if `return_past_key_values` is True.
|
||||||
|
|
||||||
Ensures padding is on the left side for the tokenizer.
|
Ensures padding is on the left side for the tokenizer.
|
||||||
|
@ -97,32 +97,37 @@ def streaming_chat(
|
||||||
history = []
|
history = []
|
||||||
if logits_processor is None:
|
if logits_processor is None:
|
||||||
logits_processor = LogitsProcessorList()
|
logits_processor = LogitsProcessorList()
|
||||||
|
|
||||||
generation_kwargs = {
|
generation_kwargs = {
|
||||||
'temperature': temperature,
|
"temperature": temperature,
|
||||||
'top_p': top_p,
|
"top_p": top_p,
|
||||||
'top_k': top_k,
|
"top_k": top_k,
|
||||||
'do_sample': do_sample,
|
"do_sample": do_sample,
|
||||||
'max_new_tokens': max_new_tokens,
|
"max_new_tokens": max_new_tokens,
|
||||||
'length_penalty': length_penalty,
|
"length_penalty": length_penalty,
|
||||||
'use_cache': True,
|
"use_cache": True,
|
||||||
**kwargs
|
**kwargs,
|
||||||
}
|
}
|
||||||
|
|
||||||
prompt_str = get_prompt_template(input_query, history=history, roles=roles)
|
prompt_str = get_prompt_template(input_query, history=history, roles=roles)
|
||||||
|
|
||||||
eos_token_id = [tokenizer.eos_token_id]
|
eos_token_id = [tokenizer.eos_token_id]
|
||||||
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
|
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
|
||||||
history.append({"role": roles[1], "message": input_query.strip()})
|
history.append({"role": roles[1], "message": input_query.strip()})
|
||||||
history.append({"role": roles[2], "message": None})
|
history.append({"role": roles[2], "message": None})
|
||||||
|
|
||||||
for outputs in stream_generate(model, **inputs, past_key_values=past_key_values,
|
for outputs in stream_generate(
|
||||||
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
|
model,
|
||||||
**generation_kwargs):
|
**inputs,
|
||||||
|
past_key_values=past_key_values,
|
||||||
|
eos_token_id=eos_token_id,
|
||||||
|
return_past_key_values=return_past_key_values,
|
||||||
|
**generation_kwargs,
|
||||||
|
):
|
||||||
if return_past_key_values:
|
if return_past_key_values:
|
||||||
outputs, past_key_values = outputs
|
outputs, past_key_values = outputs
|
||||||
|
|
||||||
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
|
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
|
||||||
response = tokenizer.decode(outputs)
|
response = tokenizer.decode(outputs)
|
||||||
|
|
||||||
history[-1]["message"] = response.strip()
|
history[-1]["message"] = response.strip()
|
||||||
|
@ -130,30 +135,30 @@ def streaming_chat(
|
||||||
yield response, history, past_key_values
|
yield response, history, past_key_values
|
||||||
else:
|
else:
|
||||||
yield response, history
|
yield response, history
|
||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def stream_generate(
|
def stream_generate(
|
||||||
model: Any,
|
model: Any,
|
||||||
input_ids: torch.Tensor,
|
input_ids: torch.Tensor,
|
||||||
generation_config: Optional[GenerationConfig] = None,
|
generation_config: Optional[GenerationConfig] = None,
|
||||||
logits_processor: Optional[LogitsProcessorList] = None,
|
logits_processor: Optional[LogitsProcessorList] = None,
|
||||||
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
||||||
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
|
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
|
||||||
return_past_key_values: bool = False,
|
return_past_key_values: bool = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Generates sequences of token ids using the specified model and generation parameters.
|
Generates sequences of token ids using the specified model and generation parameters.
|
||||||
Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
|
Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model (Any): The model used for generating sequences of token ids.
|
model (Any): The model used for generating sequences of token ids.
|
||||||
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
|
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
|
||||||
generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
|
generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
|
||||||
logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
|
logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
|
||||||
and generation config.
|
and generation config.
|
||||||
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
|
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
|
||||||
and a generation config.
|
and a generation config.
|
||||||
prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
|
prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
|
||||||
return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
|
return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
|
||||||
|
@ -169,7 +174,7 @@ def stream_generate(
|
||||||
generation_config = model.generation_config
|
generation_config = model.generation_config
|
||||||
generation_config = deepcopy(generation_config)
|
generation_config = deepcopy(generation_config)
|
||||||
model_kwargs = generation_config.update(**kwargs)
|
model_kwargs = generation_config.update(**kwargs)
|
||||||
|
|
||||||
eos_token_id = generation_config.eos_token_id
|
eos_token_id = generation_config.eos_token_id
|
||||||
if isinstance(eos_token_id, int):
|
if isinstance(eos_token_id, int):
|
||||||
eos_token_id = [eos_token_id]
|
eos_token_id = [eos_token_id]
|
||||||
|
@ -177,25 +182,25 @@ def stream_generate(
|
||||||
|
|
||||||
if generation_config.max_new_tokens is not None:
|
if generation_config.max_new_tokens is not None:
|
||||||
generation_config.max_length = generation_config.max_new_tokens + input_ids_len
|
generation_config.max_length = generation_config.max_new_tokens + input_ids_len
|
||||||
|
|
||||||
if input_ids_len >= generation_config.max_length:
|
if input_ids_len >= generation_config.max_length:
|
||||||
input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
|
input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
|
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
|
||||||
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
||||||
" increasing `max_new_tokens`."
|
" increasing `max_new_tokens`."
|
||||||
)
|
)
|
||||||
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
|
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
|
||||||
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
|
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
|
||||||
|
|
||||||
# prepare distribution pre_processing samplers
|
# prepare distribution pre_processing samplers
|
||||||
logits_processor = model._get_logits_processor(
|
logits_processor = model._get_logits_processor(
|
||||||
generation_config=generation_config,
|
generation_config=generation_config,
|
||||||
input_ids_seq_length=input_ids_len,
|
input_ids_seq_length=input_ids_len,
|
||||||
encoder_input_ids=input_ids,
|
encoder_input_ids=input_ids,
|
||||||
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
|
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
|
||||||
logits_processor=logits_processor,
|
logits_processor=logits_processor,
|
||||||
)
|
)
|
||||||
|
|
||||||
# prepare stopping criteria
|
# prepare stopping criteria
|
||||||
stopping_criteria = model._get_stopping_criteria(
|
stopping_criteria = model._get_stopping_criteria(
|
||||||
|
@ -205,7 +210,7 @@ def stream_generate(
|
||||||
logits_warper = model._get_logits_warper(generation_config)
|
logits_warper = model._get_logits_warper(generation_config)
|
||||||
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
|
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
|
||||||
scores = None
|
scores = None
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
|
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
|
||||||
# forward pass to get next token
|
# forward pass to get next token
|
||||||
|
@ -244,4 +249,4 @@ def stream_generate(
|
||||||
yield input_ids
|
yield input_ids
|
||||||
# stop when each sentence is finished, or if exceed the maximum length
|
# stop when each sentence is finished, or if exceed the maximum length
|
||||||
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
|
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
|
||||||
break
|
break
|
||||||
|
|
|
@ -43,7 +43,7 @@ if __name__ == '__main__':
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base")
|
tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base")
|
||||||
|
|
||||||
question = "xxx" # Your question.
|
question = "xxx" # Your question.
|
||||||
prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n"
|
prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n"
|
||||||
f"{question}"
|
f"{question}"
|
||||||
|
@ -111,7 +111,7 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
|
||||||
| 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 |
|
| 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 |
|
||||||
| 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 |
|
| 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 |
|
||||||
|
|
||||||
#### Finance
|
#### Finance
|
||||||
| Question | <center>Colossal-LLaMA-2-13b-base</center> |
|
| Question | <center>Colossal-LLaMA-2-13b-base</center> |
|
||||||
| :------: | :----------------------- |
|
| :------: | :----------------------- |
|
||||||
| 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。|
|
| 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。|
|
||||||
|
@ -150,4 +150,4 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
|
||||||
|
|
||||||
|
|
||||||
## Conclusion
|
## Conclusion
|
||||||
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
|
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
|
||||||
|
|
|
@ -242,4 +242,4 @@ To comprehensively assess the performance of the Colossal-LLaMA-2-7B-base model,
|
||||||
## Conclusion
|
## Conclusion
|
||||||
In general, the Colossal-LLaMA-2-7B-base model not only enhances its understanding of English but also exhibits significant improvements in its comprehension of Chinese. It boasts a broad spectrum of general knowledge, encompassing various fields such as food, sports, technology, literature, games, and more. Regarding text generation tasks, the Colossal-LLaMA-2-7B-base model excels in writing performance; however, its ability to generate specific formats like code, emails, tables, etc., needs enhancement due to the scarcity of relevant training data during our training phase. When compared to the Qwen-7b-base model, the Colossal-LLaMA-2-7B-base model outperforms it in answering most English questions and some Chinese questions, as demonstrated in the examples above.
|
In general, the Colossal-LLaMA-2-7B-base model not only enhances its understanding of English but also exhibits significant improvements in its comprehension of Chinese. It boasts a broad spectrum of general knowledge, encompassing various fields such as food, sports, technology, literature, games, and more. Regarding text generation tasks, the Colossal-LLaMA-2-7B-base model excels in writing performance; however, its ability to generate specific formats like code, emails, tables, etc., needs enhancement due to the scarcity of relevant training data during our training phase. When compared to the Qwen-7b-base model, the Colossal-LLaMA-2-7B-base model outperforms it in answering most English questions and some Chinese questions, as demonstrated in the examples above.
|
||||||
|
|
||||||
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.
|
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
hostname1
|
hostname1
|
||||||
hostname2
|
hostname2
|
||||||
|
|
|
@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs):
|
||||||
model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left')
|
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
|
||||||
except OSError:
|
except OSError:
|
||||||
raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")
|
raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")
|
||||||
|
|
||||||
|
|
|
@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5
|
||||||
tqdm
|
tqdm
|
||||||
sentencepiece==0.1.99
|
sentencepiece==0.1.99
|
||||||
protobuf<=3.20.0
|
protobuf<=3.20.0
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
||||||
from colossal_llama2.utils.stream_chat_patch import streaming_chat
|
from colossal_llama2.utils.stream_chat_patch import streaming_chat
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
|
SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval()
|
model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval()
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
|
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
|
||||||
|
@ -27,29 +27,34 @@ def main(args):
|
||||||
print(f"\n{roles[2]}: ", end="")
|
print(f"\n{roles[2]}: ", end="")
|
||||||
gen_len = 0
|
gen_len = 0
|
||||||
for response, history, past_key_values in streaming_chat(
|
for response, history, past_key_values in streaming_chat(
|
||||||
model, tokenizer, input_query, history=history, roles=roles,
|
model,
|
||||||
temperature = args.temperature,
|
tokenizer,
|
||||||
top_p = args.top_p,
|
input_query,
|
||||||
top_k = args.top_k,
|
history=history,
|
||||||
do_sample = args.do_sample,
|
roles=roles,
|
||||||
length_penalty = args.length_penalty,
|
temperature=args.temperature,
|
||||||
max_new_tokens = args.max_new_tokens,
|
top_p=args.top_p,
|
||||||
|
top_k=args.top_k,
|
||||||
|
do_sample=args.do_sample,
|
||||||
|
length_penalty=args.length_penalty,
|
||||||
|
max_new_tokens=args.max_new_tokens,
|
||||||
past_key_values=past_key_values,
|
past_key_values=past_key_values,
|
||||||
return_past_key_values=True):
|
return_past_key_values=True,
|
||||||
|
):
|
||||||
output = response[gen_len:]
|
output = response[gen_len:]
|
||||||
print(output, end="", flush=True)
|
print(output, end="", flush=True)
|
||||||
gen_len = len(response)
|
gen_len = len(response)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--model_path', type=str, default=None, help="path to chat version model")
|
parser.add_argument("--model_path", type=str, default=None, help="path to chat version model")
|
||||||
parser.add_argument('--tokenizer_path', type=str, default=None, help="path to chat version tokenizer")
|
parser.add_argument("--tokenizer_path", type=str, default=None, help="path to chat version tokenizer")
|
||||||
parser.add_argument('--temperature', type=float, default=0.8, help="set temperature")
|
parser.add_argument("--temperature", type=float, default=0.8, help="set temperature")
|
||||||
parser.add_argument('--top_p', type=float, default=0.95, help="set top p value")
|
parser.add_argument("--top_p", type=float, default=0.95, help="set top p value")
|
||||||
parser.add_argument('--top_k', type=int, default=50, help="set top k value")
|
parser.add_argument("--top_k", type=int, default=50, help="set top k value")
|
||||||
parser.add_argument('--do_sample', type=bool, default=True, help="whether turn on do_sample or not")
|
parser.add_argument("--do_sample", type=bool, default=True, help="whether turn on do_sample or not")
|
||||||
parser.add_argument('--length_penalty', type=float, default=1.2, help="set length penalty")
|
parser.add_argument("--length_penalty", type=float, default=1.2, help="set length penalty")
|
||||||
parser.add_argument('--max_new_tokens', type=int, default=512, help="set max new tokens")
|
parser.add_argument("--max_new_tokens", type=int, default=512, help="set max new tokens")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args)
|
main(args)
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
0.0.1
|
0.0.1
|
||||||
|
|
|
@ -20,13 +20,13 @@ import colossalai
|
||||||
from colossalai.booster import Booster
|
from colossalai.booster import Booster
|
||||||
from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin
|
from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin
|
||||||
from colossalai.cluster import DistCoordinator
|
from colossalai.cluster import DistCoordinator
|
||||||
|
from colossalai.logging import get_dist_logger
|
||||||
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
|
||||||
from colossalai.nn.optimizer import HybridAdam
|
from colossalai.nn.optimizer import HybridAdam
|
||||||
from colossalai.utils import get_current_device
|
|
||||||
from colossalai.logging import get_dist_logger
|
|
||||||
|
|
||||||
logger = get_dist_logger()
|
logger = get_dist_logger()
|
||||||
|
|
||||||
|
|
||||||
def train(args):
|
def train(args):
|
||||||
# check lora compatibility
|
# check lora compatibility
|
||||||
if "gemini" in args.plugin and args.lora_rank > 0:
|
if "gemini" in args.plugin and args.lora_rank > 0:
|
||||||
|
|
|
@ -3,7 +3,6 @@ import copy
|
||||||
import os
|
import os
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
from colossal_eval import dataset, models, utils
|
from colossal_eval import dataset, models, utils
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -106,6 +106,5 @@ def main():
|
||||||
print(f"[{coordinator.rank}] {outputs}")
|
print(f"[{coordinator.rank}] {outputs}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field
|
||||||
from langchain.schema import BaseRetriever, Document
|
from langchain.schema import BaseRetriever, Document
|
||||||
from langchain.schema.language_model import BaseLanguageModel
|
from langchain.schema.language_model import BaseLanguageModel
|
||||||
|
|
||||||
|
|
||||||
class CustomBaseRetrievalQA(BaseRetrievalQA):
|
class CustomBaseRetrievalQA(BaseRetrievalQA):
|
||||||
"""Base class for question-answering chains."""
|
"""Base class for question-answering chains."""
|
||||||
|
|
||||||
|
@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
|
||||||
for k, v in inputs.items()
|
for k, v in inputs.items()
|
||||||
if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"]
|
if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"]
|
||||||
}
|
}
|
||||||
answers = []
|
|
||||||
if self.combine_documents_chain.memory is not None:
|
if self.combine_documents_chain.memory is not None:
|
||||||
buffered_history_backup, summarized_history_temp_backup = copy.deepcopy(
|
buffered_history_backup, summarized_history_temp_backup = copy.deepcopy(
|
||||||
self.combine_documents_chain.memory.buffered_history
|
self.combine_documents_chain.memory.buffered_history
|
||||||
|
@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
|
||||||
) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup)
|
) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup)
|
||||||
|
|
||||||
# if rejection_trigger_keywords is not given, return the response from LLM directly
|
# if rejection_trigger_keywords is not given, return the response from LLM directly
|
||||||
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', [])
|
rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
|
||||||
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None
|
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None
|
||||||
if answer is None:
|
if answer is None:
|
||||||
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。")
|
answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
|
||||||
if self.combine_documents_chain.memory is not None:
|
if self.combine_documents_chain.memory is not None:
|
||||||
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
|
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
|
||||||
|
|
||||||
|
@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
|
||||||
input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs
|
input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs
|
||||||
)
|
)
|
||||||
# if rejection_trigger_keywords is not given, return the response from LLM directly
|
# if rejection_trigger_keywords is not given, return the response from LLM directly
|
||||||
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', [])
|
rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
|
||||||
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords)==0 else None
|
answer = (
|
||||||
|
answer
|
||||||
|
if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords) == 0
|
||||||
|
else None
|
||||||
|
)
|
||||||
if answer is None:
|
if answer is None:
|
||||||
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。")
|
answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
|
||||||
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
|
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
|
||||||
|
|
||||||
if self.return_source_documents:
|
if self.return_source_documents:
|
||||||
|
|
|
@ -126,7 +126,7 @@ class DocumentLoader:
|
||||||
else:
|
else:
|
||||||
# May ba a directory, we strictly follow the glob path and will not load files in subdirectories
|
# May ba a directory, we strictly follow the glob path and will not load files in subdirectories
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
"""
|
"""
|
||||||
Clear loaded data.
|
Clear loaded data.
|
||||||
|
|
|
@ -1,39 +1,40 @@
|
||||||
'''
|
"""
|
||||||
Class for loading table type data. please refer to Pandas-Input/Output for file format details.
|
Class for loading table type data. please refer to Pandas-Input/Output for file format details.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sqlalchemy import create_engine
|
|
||||||
from colossalqa.utils import drop_table
|
|
||||||
from colossalqa.mylogging import get_logger
|
from colossalqa.mylogging import get_logger
|
||||||
|
from colossalqa.utils import drop_table
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
SUPPORTED_DATA_FORMAT = ['.csv','.xlsx', '.xls','.json','.html','.h5', '.hdf5','.parquet','.feather','.dta']
|
SUPPORTED_DATA_FORMAT = [".csv", ".xlsx", ".xls", ".json", ".html", ".h5", ".hdf5", ".parquet", ".feather", ".dta"]
|
||||||
|
|
||||||
|
|
||||||
class TableLoader:
|
class TableLoader:
|
||||||
'''
|
"""
|
||||||
Load tables from different files and serve a sql database for database operations
|
Load tables from different files and serve a sql database for database operations
|
||||||
'''
|
"""
|
||||||
def __init__(self, files: str,
|
|
||||||
sql_path:str='sqlite:///mydatabase.db',
|
def __init__(self, files: str, sql_path: str = "sqlite:///mydatabase.db", verbose=False, **kwargs) -> None:
|
||||||
verbose=False, **kwargs) -> None:
|
"""
|
||||||
'''
|
|
||||||
Args:
|
Args:
|
||||||
files: list of files (list[file path, name])
|
files: list of files (list[file path, name])
|
||||||
sql_path: how to serve the sql database
|
sql_path: how to serve the sql database
|
||||||
**kwargs: keyword type arguments, useful for certain document types
|
**kwargs: keyword type arguments, useful for certain document types
|
||||||
'''
|
"""
|
||||||
self.data = {}
|
self.data = {}
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.sql_path = sql_path
|
self.sql_path = sql_path
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
self.sql_engine = create_engine(self.sql_path)
|
self.sql_engine = create_engine(self.sql_path)
|
||||||
drop_table(self.sql_engine)
|
drop_table(self.sql_engine)
|
||||||
|
|
||||||
self.sql_engine = create_engine(self.sql_path)
|
self.sql_engine = create_engine(self.sql_path)
|
||||||
for item in files:
|
for item in files:
|
||||||
path = item[0]
|
path = item[0]
|
||||||
|
@ -42,68 +43,68 @@ class TableLoader:
|
||||||
raise FileNotFoundError(f"{path} doesn't exists")
|
raise FileNotFoundError(f"{path} doesn't exists")
|
||||||
if not any([path.endswith(i) for i in SUPPORTED_DATA_FORMAT]):
|
if not any([path.endswith(i) for i in SUPPORTED_DATA_FORMAT]):
|
||||||
raise TypeError(f"{path} not supported. Supported type {SUPPORTED_DATA_FORMAT}")
|
raise TypeError(f"{path} not supported. Supported type {SUPPORTED_DATA_FORMAT}")
|
||||||
|
|
||||||
logger.info("loading data", verbose=self.verbose)
|
logger.info("loading data", verbose=self.verbose)
|
||||||
self.load_data(path)
|
self.load_data(path)
|
||||||
logger.info("data loaded", verbose=self.verbose)
|
logger.info("data loaded", verbose=self.verbose)
|
||||||
self.to_sql(path, dataset_name)
|
self.to_sql(path, dataset_name)
|
||||||
|
|
||||||
def load_data(self, path):
|
def load_data(self, path):
|
||||||
'''
|
"""
|
||||||
Load data and serve the data as sql database.
|
Load data and serve the data as sql database.
|
||||||
Data must be in pandas format
|
Data must be in pandas format
|
||||||
'''
|
"""
|
||||||
files = []
|
files = []
|
||||||
# Handle glob expression
|
# Handle glob expression
|
||||||
try:
|
try:
|
||||||
files = glob.glob(path)
|
files = glob.glob(path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
if len(files)==0:
|
if len(files) == 0:
|
||||||
raise ValueError("Unsupported file/directory format. For directories, please use glob expression")
|
raise ValueError("Unsupported file/directory format. For directories, please use glob expression")
|
||||||
elif len(files)==1:
|
elif len(files) == 1:
|
||||||
path = files[0]
|
path = files[0]
|
||||||
else:
|
else:
|
||||||
for file in files:
|
for file in files:
|
||||||
self.load_data(file)
|
self.load_data(file)
|
||||||
|
|
||||||
if path.endswith('.csv'):
|
if path.endswith(".csv"):
|
||||||
# Load csv
|
# Load csv
|
||||||
self.data[path] = pd.read_csv(path)
|
self.data[path] = pd.read_csv(path)
|
||||||
elif path.endswith('.xlsx') or path.endswith('.xls'):
|
elif path.endswith(".xlsx") or path.endswith(".xls"):
|
||||||
# Load excel
|
# Load excel
|
||||||
self.data[path] = pd.read_excel(path) # You can adjust the sheet_name as needed
|
self.data[path] = pd.read_excel(path) # You can adjust the sheet_name as needed
|
||||||
elif path.endswith('.json'):
|
elif path.endswith(".json"):
|
||||||
# Load json
|
# Load json
|
||||||
self.data[path] = pd.read_json(path)
|
self.data[path] = pd.read_json(path)
|
||||||
elif path.endswith('.html'):
|
elif path.endswith(".html"):
|
||||||
# Load html
|
# Load html
|
||||||
html_tables = pd.read_html(path)
|
html_tables = pd.read_html(path)
|
||||||
# Choose the desired table from the list of DataFrame objects
|
# Choose the desired table from the list of DataFrame objects
|
||||||
self.data[path] = html_tables[0] # You may need to adjust this index
|
self.data[path] = html_tables[0] # You may need to adjust this index
|
||||||
elif path.endswith('.h5') or path.endswith('.hdf5'):
|
elif path.endswith(".h5") or path.endswith(".hdf5"):
|
||||||
# Load h5
|
# Load h5
|
||||||
self.data[path] = pd.read_hdf(path, key=self.kwargs.get('key', 'data')) # You can adjust the key as needed
|
self.data[path] = pd.read_hdf(path, key=self.kwargs.get("key", "data")) # You can adjust the key as needed
|
||||||
elif path.endswith('.parquet'):
|
elif path.endswith(".parquet"):
|
||||||
# Load parquet
|
# Load parquet
|
||||||
self.data[path] = pd.read_parquet(path, engine='fastparquet')
|
self.data[path] = pd.read_parquet(path, engine="fastparquet")
|
||||||
elif path.endswith('.feather'):
|
elif path.endswith(".feather"):
|
||||||
# Load feather
|
# Load feather
|
||||||
self.data[path] = pd.read_feather(path)
|
self.data[path] = pd.read_feather(path)
|
||||||
elif path.endswith('.dta'):
|
elif path.endswith(".dta"):
|
||||||
# Load dta
|
# Load dta
|
||||||
self.data[path] = pd.read_stata(path)
|
self.data[path] = pd.read_stata(path)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported file format")
|
raise ValueError("Unsupported file format")
|
||||||
|
|
||||||
def to_sql(self, path, table_name):
|
def to_sql(self, path, table_name):
|
||||||
'''
|
"""
|
||||||
Serve the data as sql database.
|
Serve the data as sql database.
|
||||||
'''
|
"""
|
||||||
self.data[path].to_sql(table_name, con=self.sql_engine, if_exists='replace', index=False)
|
self.data[path].to_sql(table_name, con=self.sql_engine, if_exists="replace", index=False)
|
||||||
logger.info(f"Loaded to Sqlite3\nPath: {path}", verbose=self.verbose)
|
logger.info(f"Loaded to Sqlite3\nPath: {path}", verbose=self.verbose)
|
||||||
return self.sql_path
|
return self.sql_path
|
||||||
|
|
||||||
def get_sql_path(self):
|
def get_sql_path(self):
|
||||||
return self.sql_path
|
return self.sql_path
|
||||||
|
|
||||||
|
@ -113,7 +114,3 @@ class TableLoader:
|
||||||
self.sql_engine.dispose()
|
self.sql_engine.dispose()
|
||||||
del self.data
|
del self.data
|
||||||
del self.sql_engine
|
del self.sql_engine
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ print(resp) # super-heavyweight awesome-natured yawning Australian creature!
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
from typing import Any, List, Mapping, Optional
|
from typing import Any, Mapping
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from langchain.llms.base import LLM
|
from langchain.llms.base import LLM
|
||||||
|
@ -31,31 +31,31 @@ from langchain.utils import get_from_dict_or_env
|
||||||
class ColossalCloudLLM(LLM):
|
class ColossalCloudLLM(LLM):
|
||||||
"""
|
"""
|
||||||
A custom LLM class that integrates LLMs running on the ColossalCloud Platform
|
A custom LLM class that integrates LLMs running on the ColossalCloud Platform
|
||||||
|
|
||||||
"""
|
"""
|
||||||
n: int
|
|
||||||
gen_config: dict = None
|
n: int
|
||||||
|
gen_config: dict = None
|
||||||
auth_config: dict = None
|
auth_config: dict = None
|
||||||
valid_gen_para: list = ['max_new_tokens', 'top_k',
|
valid_gen_para: list = ["max_new_tokens", "top_k", "top_p", "temperature", "repetition_penalty"]
|
||||||
'top_p', 'temperature', 'repetition_penalty']
|
|
||||||
|
|
||||||
def __init__(self, gen_config=None, **kwargs):
|
def __init__(self, gen_config=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
gen_config: config for generation,
|
gen_config: config for generation,
|
||||||
max_new_tokens: 50 by default
|
max_new_tokens: 50 by default
|
||||||
top_k: (1, vocab_size)
|
top_k: (1, vocab_size)
|
||||||
top_p: (0, 1) if not None
|
top_p: (0, 1) if not None
|
||||||
temperature: (0, inf) if not None
|
temperature: (0, inf) if not None
|
||||||
repetition_penalty: (1, inf) if not None
|
repetition_penalty: (1, inf) if not None
|
||||||
"""
|
"""
|
||||||
super(ColossalCloudLLM, self).__init__(**kwargs)
|
super(ColossalCloudLLM, self).__init__(**kwargs)
|
||||||
if gen_config is None:
|
if gen_config is None:
|
||||||
self.gen_config = {"max_new_tokens": 50}
|
self.gen_config = {"max_new_tokens": 50}
|
||||||
else:
|
else:
|
||||||
assert "max_new_tokens" in gen_config, "max_new_tokens is a compulsory key in the gen config"
|
assert "max_new_tokens" in gen_config, "max_new_tokens is a compulsory key in the gen config"
|
||||||
self.gen_config = gen_config
|
self.gen_config = gen_config
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _identifying_params(self) -> Mapping[str, Any]:
|
def _identifying_params(self) -> Mapping[str, Any]:
|
||||||
"""Get the identifying parameters."""
|
"""Get the identifying parameters."""
|
||||||
|
@ -63,17 +63,17 @@ class ColossalCloudLLM(LLM):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _llm_type(self) -> str:
|
def _llm_type(self) -> str:
|
||||||
return 'ColossalCloudLLM'
|
return "ColossalCloudLLM"
|
||||||
|
|
||||||
def set_auth_config(self, **kwargs):
|
def set_auth_config(self, **kwargs):
|
||||||
url = get_from_dict_or_env(kwargs, "url", "URL")
|
url = get_from_dict_or_env(kwargs, "url", "URL")
|
||||||
host = get_from_dict_or_env(kwargs, "host", "HOST")
|
host = get_from_dict_or_env(kwargs, "host", "HOST")
|
||||||
|
|
||||||
auth_config = {}
|
auth_config = {}
|
||||||
auth_config['endpoint'] = url
|
auth_config["endpoint"] = url
|
||||||
auth_config['Host'] = host
|
auth_config["Host"] = host
|
||||||
self.auth_config = auth_config
|
self.auth_config = auth_config
|
||||||
|
|
||||||
def _call(self, prompt: str, stop=None, **kwargs: Any) -> str:
|
def _call(self, prompt: str, stop=None, **kwargs: Any) -> str:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
|
@ -81,15 +81,17 @@ class ColossalCloudLLM(LLM):
|
||||||
stop: A list of strings to stop generation when encountered
|
stop: A list of strings to stop generation when encountered
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The string generated by the model
|
The string generated by the model
|
||||||
"""
|
"""
|
||||||
# Update the generation arguments
|
# Update the generation arguments
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
if key not in self.valid_gen_para:
|
if key not in self.valid_gen_para:
|
||||||
raise KeyError(f"Invalid generation parameter: '{key}'. Valid keys are: {', '.join(self.valid_gen_para)}")
|
raise KeyError(
|
||||||
|
f"Invalid generation parameter: '{key}'. Valid keys are: {', '.join(self.valid_gen_para)}"
|
||||||
|
)
|
||||||
if key in self.gen_config:
|
if key in self.gen_config:
|
||||||
self.gen_config[key] = value
|
self.gen_config[key] = value
|
||||||
|
|
||||||
resp_text = self.text_completion(prompt, self.gen_config, self.auth_config)
|
resp_text = self.text_completion(prompt, self.gen_config, self.auth_config)
|
||||||
# TODO: This may cause excessive tokens count
|
# TODO: This may cause excessive tokens count
|
||||||
if stop is not None:
|
if stop is not None:
|
||||||
|
@ -97,29 +99,19 @@ class ColossalCloudLLM(LLM):
|
||||||
if stopping_words in resp_text:
|
if stopping_words in resp_text:
|
||||||
resp_text = resp_text.split(stopping_words)[0]
|
resp_text = resp_text.split(stopping_words)[0]
|
||||||
return resp_text
|
return resp_text
|
||||||
|
|
||||||
|
|
||||||
def text_completion(self, prompt, gen_config, auth_config):
|
def text_completion(self, prompt, gen_config, auth_config):
|
||||||
# Complusory Parameters
|
# Complusory Parameters
|
||||||
endpoint = auth_config.pop('endpoint')
|
endpoint = auth_config.pop("endpoint")
|
||||||
max_new_tokens = gen_config.pop('max_new_tokens')
|
max_new_tokens = gen_config.pop("max_new_tokens")
|
||||||
# Optional Parameters
|
# Optional Parameters
|
||||||
optional_params = ['top_k', 'top_p', 'temperature', 'repetition_penalty'] # Self.optional
|
optional_params = ["top_k", "top_p", "temperature", "repetition_penalty"] # Self.optional
|
||||||
gen_config = {key: gen_config[key] for key in optional_params if key in gen_config}
|
gen_config = {key: gen_config[key] for key in optional_params if key in gen_config}
|
||||||
# Define the data payload
|
# Define the data payload
|
||||||
data = {
|
data = {"max_new_tokens": max_new_tokens, "history": [{"instruction": prompt, "response": ""}], **gen_config}
|
||||||
"max_new_tokens": max_new_tokens,
|
headers = {"Content-Type": "application/json", **auth_config} # 'Host',
|
||||||
"history": [
|
|
||||||
{"instruction": prompt, "response": ""}
|
|
||||||
],
|
|
||||||
**gen_config
|
|
||||||
}
|
|
||||||
headers = {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
**auth_config # 'Host',
|
|
||||||
}
|
|
||||||
# Make the POST request
|
# Make the POST request
|
||||||
response = requests.post(endpoint, headers=headers, data=json.dumps(data))
|
response = requests.post(endpoint, headers=headers, data=json.dumps(data))
|
||||||
response.raise_for_status() # raise error if return code is not 200(success)
|
response.raise_for_status() # raise error if return code is not 200(success)
|
||||||
# Check the response
|
# Check the response
|
||||||
return response.text
|
return response.text
|
||||||
|
|
|
@ -193,4 +193,3 @@ class VllmLLM(LLM):
|
||||||
def _identifying_params(self) -> Mapping[str, int]:
|
def _identifying_params(self) -> Mapping[str, int]:
|
||||||
"""Get the identifying parameters."""
|
"""Get the identifying parameters."""
|
||||||
return {"n": self.n}
|
return {"n": self.n}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ All custom prompt templates are defined here.
|
||||||
|
|
||||||
from langchain.prompts.prompt import PromptTemplate
|
from langchain.prompts.prompt import PromptTemplate
|
||||||
|
|
||||||
|
|
||||||
# Below are Chinese retrieval qa prompts
|
# Below are Chinese retrieval qa prompts
|
||||||
|
|
||||||
_CUSTOM_SUMMARIZER_TEMPLATE_ZH = """请递进式地总结所提供的当前对话,将当前对话的摘要内容添加到先前已有的摘要上,返回一个融合了当前对话的新的摘要。
|
_CUSTOM_SUMMARIZER_TEMPLATE_ZH = """请递进式地总结所提供的当前对话,将当前对话的摘要内容添加到先前已有的摘要上,返回一个融合了当前对话的新的摘要。
|
||||||
|
|
|
@ -99,13 +99,7 @@ class CustomRetriever(BaseRetriever):
|
||||||
def clear_documents(self):
|
def clear_documents(self):
|
||||||
"""Clear all document vectors from database"""
|
"""Clear all document vectors from database"""
|
||||||
for source in self.vector_stores:
|
for source in self.vector_stores:
|
||||||
index(
|
index([], self.record_managers[source], self.vector_stores[source], cleanup="full", source_id_key="source")
|
||||||
[],
|
|
||||||
self.record_managers[source],
|
|
||||||
self.vector_stores[source],
|
|
||||||
cleanup="full",
|
|
||||||
source_id_key="source"
|
|
||||||
)
|
|
||||||
self.vector_stores = {}
|
self.vector_stores = {}
|
||||||
self.sql_index_database = {}
|
self.sql_index_database = {}
|
||||||
self.record_managers = {}
|
self.record_managers = {}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
Overview The Straits Times is the English flagship daily of SPH Media, one of the leading media companies in Asia. Launched on July 15, 1845, its comprehensive coverage of news from home and around the world makes The Straits Times the most-read newspaper in Singapore. Quality news, in-depth analyses, impactful commentaries and breaking stories are packaged to give readers riveting accounts of events in Singapore, the region, and beyond. The most read newspaper in Singapore, both in terms of print and digital, it reaches 1.33 million people every day. The Straits Times' key strength is in its world class coverage of news outside Singapore. With 20 bureaus in major cities around the world, The Straits Times correspondents bring world news to readers on a Singapore platter, helping readers to appreciate world events from a Singaporean perspective. Website http://www.straitstimes.com Phone 63196319Phone number is 63196319 Industry Newspaper Publishing Company size 1,001-5,000 employees 183 on LinkedIn Includes members with current employer listed as The Straits Times, including part-time roles. Headquarters Singapore, Singapore Founded 1845 Specialties News and Digital media
|
Overview The Straits Times is the English flagship daily of SPH Media, one of the leading media companies in Asia. Launched on July 15, 1845, its comprehensive coverage of news from home and around the world makes The Straits Times the most-read newspaper in Singapore. Quality news, in-depth analyses, impactful commentaries and breaking stories are packaged to give readers riveting accounts of events in Singapore, the region, and beyond. The most read newspaper in Singapore, both in terms of print and digital, it reaches 1.33 million people every day. The Straits Times' key strength is in its world class coverage of news outside Singapore. With 20 bureaus in major cities around the world, The Straits Times correspondents bring world news to readers on a Singapore platter, helping readers to appreciate world events from a Singaporean perspective. Website http://www.straitstimes.com Phone 63196319Phone number is 63196319 Industry Newspaper Publishing Company size 1,001-5,000 employees 183 on LinkedIn Includes members with current employer listed as The Straits Times, including part-time roles. Headquarters Singapore, Singapore Founded 1845 Specialties News and Digital media
|
||||||
About With over 500 properties worldwide, Marriott Hotels has reimagined hospitality to exceed the expectations of business, group, and leisure travelers.
|
About With over 500 properties worldwide, Marriott Hotels has reimagined hospitality to exceed the expectations of business, group, and leisure travelers.
|
||||||
Marriott Hotels, Marriott’s flagship brand of quality-tier, full-service hotels and resorts, provides consistent, dependable and genuinely caring experiences to guests on their terms. Marriott is a brilliant host to guests who effortlessly blend life and work, and who are inspired by how modern travel enhances them both. Our hotels offer warm, professional service; sophisticated yet functional guest room design; lobby spaces that facilitate working, dining and socializing; restaurants and bars serving international cuisine prepared simply and from the freshest ingredients; meeting and event spaces and services that are gold standard; and expansive, 24-hour fitness facilities.
|
Marriott Hotels, Marriott’s flagship brand of quality-tier, full-service hotels and resorts, provides consistent, dependable and genuinely caring experiences to guests on their terms. Marriott is a brilliant host to guests who effortlessly blend life and work, and who are inspired by how modern travel enhances them both. Our hotels offer warm, professional service; sophisticated yet functional guest room design; lobby spaces that facilitate working, dining and socializing; restaurants and bars serving international cuisine prepared simply and from the freshest ingredients; meeting and event spaces and services that are gold standard; and expansive, 24-hour fitness facilities.
|
||||||
Overview AERCO International, Inc. is a recognized leader in delivering cost-effective, condensing commercial boilers, high-efficiency water heaters across a variety of markets including education, lodging, government, office buildings, healthcare, industrial and multifamily housing. AERCO's system design approach provides customer-specific solutions that deliver superior building performance at a lower operating cost while assuring uptime reliability. When AERCO was founded in 1949, it introduced a revolutionary design for an indirect-fired water heater that heated water on demand, and without storage, at a controlled temperature. This innovation became today's standard for water heaters, maximizing the recovery of latent heat energy and significantly increasing operating efficiency. AERCO continued to innovate and in 1988, introduced the first condensing and fully modulating boiler and water heater to the commercial market. The modulating capability of these products, still unsurpassed more than 25 years later, matches the equipment's output to real-time heating demand, ensuring the units draw no more fuel to operate than is absolutely necessary. This not only saves precious energy, but also ensures money doesn't needlessly disappear "up the stack." AERCO differentiates itself through a solution-based model, leveraging decades of engineering experience and industry application expertise to understand each customer’s unique needs. By partnering directly with customers and end-users to understand their project-specific requirements, AERCO provides tailored application solutions that are comprised of original product technologies including high efficiency condensing products, compact footprints, high turndown ratios, unique fuel delivery, leading control systems and proprietary design elements that combine to deliver up to 99% efficiency. Website http://www.aerco.com Phone 845-580-8000Phone number is 845-580-8000 Industry Industrial Machinery Manufacturing Company size 51-200 employees 119 on LinkedIn Includes members with current employer listed as AERCO International, Inc., including part-time roles. Headquarters Blauvelt, NY Founded 1949 Specialties Leading manufacturer of condensing boilers, water heating and energy recovery products and The originator of semi-instantaneous water heating
|
Overview AERCO International, Inc. is a recognized leader in delivering cost-effective, condensing commercial boilers, high-efficiency water heaters across a variety of markets including education, lodging, government, office buildings, healthcare, industrial and multifamily housing. AERCO's system design approach provides customer-specific solutions that deliver superior building performance at a lower operating cost while assuring uptime reliability. When AERCO was founded in 1949, it introduced a revolutionary design for an indirect-fired water heater that heated water on demand, and without storage, at a controlled temperature. This innovation became today's standard for water heaters, maximizing the recovery of latent heat energy and significantly increasing operating efficiency. AERCO continued to innovate and in 1988, introduced the first condensing and fully modulating boiler and water heater to the commercial market. The modulating capability of these products, still unsurpassed more than 25 years later, matches the equipment's output to real-time heating demand, ensuring the units draw no more fuel to operate than is absolutely necessary. This not only saves precious energy, but also ensures money doesn't needlessly disappear "up the stack." AERCO differentiates itself through a solution-based model, leveraging decades of engineering experience and industry application expertise to understand each customer’s unique needs. By partnering directly with customers and end-users to understand their project-specific requirements, AERCO provides tailored application solutions that are comprised of original product technologies including high efficiency condensing products, compact footprints, high turndown ratios, unique fuel delivery, leading control systems and proprietary design elements that combine to deliver up to 99% efficiency. Website http://www.aerco.com Phone 845-580-8000Phone number is 845-580-8000 Industry Industrial Machinery Manufacturing Company size 51-200 employees 119 on LinkedIn Includes members with current employer listed as AERCO International, Inc., including part-time roles. Headquarters Blauvelt, NY Founded 1949 Specialties Leading manufacturer of condensing boilers, water heating and energy recovery products and The originator of semi-instantaneous water heating
|
||||||
Prince PLC: Overview We are a global leader of quality water solutions for residential, industrial, municipal, and commercial settings. Our family of brands offers one of the most varied product lines in the world, with world-class, water-related solutions focused on: • Plumbing & Flow Control • Water Quality & Conditioning • Water Reuse & Drainage • HVAC • Municipal Waterworks Strategic Goals Watts Water is traded on the New York Stock Exchange under the symbol “WTS.” As a public company, growing shareholder value is critical. To that end, we focus on a five-part Global Strategy: Growth, Commercial Excellence, Operational Excellence, “One Watts Water,” and a Talent & Performance Culture. Follow us on all social media platforms @WattsWater Website http://www.watts.com/ Industry Wholesale Building Materials Company size 5,001-10,000 employees 2,248 on LinkedIn Includes members with current employer listed as Watts Water Technologies, including part-time roles. Headquarters North Andover, MA Specialties Plumbing, HVAC, Water Quality, Gas, Conditioning, Waterworks, and Drainage
|
Prince PLC: Overview We are a global leader of quality water solutions for residential, industrial, municipal, and commercial settings. Our family of brands offers one of the most varied product lines in the world, with world-class, water-related solutions focused on: • Plumbing & Flow Control • Water Quality & Conditioning • Water Reuse & Drainage • HVAC • Municipal Waterworks Strategic Goals Watts Water is traded on the New York Stock Exchange under the symbol “WTS.” As a public company, growing shareholder value is critical. To that end, we focus on a five-part Global Strategy: Growth, Commercial Excellence, Operational Excellence, “One Watts Water,” and a Talent & Performance Culture. Follow us on all social media platforms @WattsWater Website http://www.watts.com/ Industry Wholesale Building Materials Company size 5,001-10,000 employees 2,248 on LinkedIn Includes members with current employer listed as Watts Water Technologies, including part-time roles. Headquarters North Andover, MA Specialties Plumbing, HVAC, Water Quality, Gas, Conditioning, Waterworks, and Drainage
|
||||||
About Courtyard Hotels is Marriott International’s largest hotel brand, with more than 1,100 hotels in over 50 countries worldwide. So, no matter where passion takes you, you’ll find us there to help you follow it. Proud members of Marriott Bonvoy.
|
About Courtyard Hotels is Marriott International’s largest hotel brand, with more than 1,100 hotels in over 50 countries worldwide. So, no matter where passion takes you, you’ll find us there to help you follow it. Proud members of Marriott Bonvoy.
|
||||||
|
|
|
@ -3,4 +3,4 @@
|
||||||
万豪酒店(Marriott Hotels)是万豪旗下优质、全方位服务酒店和度假村的旗舰品牌,为客人提供始终如一、可靠和真诚关怀的体验。万豪是一个出色的主人,客人可以轻松地将生活和工作融合在一起,并受到现代旅行如何增强两者的启发。我们的酒店提供热情、专业的服务;精致而实用的客房设计;大堂空间,方便工作、餐饮和社交;餐厅和酒吧提供简单的国际美食和最新鲜的食材;会议及活动场地及服务均属黄金标准;还有宽敞的24小时健身设施。
|
万豪酒店(Marriott Hotels)是万豪旗下优质、全方位服务酒店和度假村的旗舰品牌,为客人提供始终如一、可靠和真诚关怀的体验。万豪是一个出色的主人,客人可以轻松地将生活和工作融合在一起,并受到现代旅行如何增强两者的启发。我们的酒店提供热情、专业的服务;精致而实用的客房设计;大堂空间,方便工作、餐饮和社交;餐厅和酒吧提供简单的国际美食和最新鲜的食材;会议及活动场地及服务均属黄金标准;还有宽敞的24小时健身设施。
|
||||||
AERCO International, Inc.是公认的领导者,为教育、住宿、政府、办公楼、医疗保健、工业和多户住宅等各种市场提供具有成本效益的冷凝商用锅炉和高效热水器。AERCO的系统设计方法为客户提供特定的解决方案,以较低的运营成本提供卓越的建筑性能,同时确保正常运行时间的可靠性。AERCO成立于1949年,它推出了一种革命性的设计,用于间接燃烧热水器,在控制温度下按需加热水,而无需储存。这一创新成为当今热水器的标准,最大限度地回收潜热能量,显著提高运行效率。AERCO不断创新,并于1988年向商业市场推出了第一台冷凝和全调制锅炉和热水器。这些产品的调制能力,在超过25年后仍然无与伦比,使设备的输出与实时加热需求相匹配,确保机组不会消耗更多的燃料来运行,除非绝对必要。这不仅节省了宝贵的能源,还确保了钱不会不必要地消失在“堆栈”上。AERCO通过基于解决方案的模式脱颖而出,利用数十年的工程经验和行业应用专业知识来了解每个客户的独特需求。通过与客户和最终用户直接合作,了解他们的项目具体要求,AERCO提供量身定制的应用解决方案,这些解决方案由原创产品技术组成,包括高效冷凝产品,紧凑的足迹,高降压比,独特的燃料输送,领先的控制系统和专有设计元素,结合起来可提供高达99%的效率。网址http://www.aerco.com电话845-580- 8000电话号码845-580-8000工业工业机械制造公司规模51-200名员工LinkedIn上包括当前雇主AERCO International, Inc的成员,包括兼职职位。总部成立于1949年,纽约州布劳维尔特,专长:冷凝锅炉,水加热和能源回收产品的领先制造商,半瞬时水加热的鼻祖
|
AERCO International, Inc.是公认的领导者,为教育、住宿、政府、办公楼、医疗保健、工业和多户住宅等各种市场提供具有成本效益的冷凝商用锅炉和高效热水器。AERCO的系统设计方法为客户提供特定的解决方案,以较低的运营成本提供卓越的建筑性能,同时确保正常运行时间的可靠性。AERCO成立于1949年,它推出了一种革命性的设计,用于间接燃烧热水器,在控制温度下按需加热水,而无需储存。这一创新成为当今热水器的标准,最大限度地回收潜热能量,显著提高运行效率。AERCO不断创新,并于1988年向商业市场推出了第一台冷凝和全调制锅炉和热水器。这些产品的调制能力,在超过25年后仍然无与伦比,使设备的输出与实时加热需求相匹配,确保机组不会消耗更多的燃料来运行,除非绝对必要。这不仅节省了宝贵的能源,还确保了钱不会不必要地消失在“堆栈”上。AERCO通过基于解决方案的模式脱颖而出,利用数十年的工程经验和行业应用专业知识来了解每个客户的独特需求。通过与客户和最终用户直接合作,了解他们的项目具体要求,AERCO提供量身定制的应用解决方案,这些解决方案由原创产品技术组成,包括高效冷凝产品,紧凑的足迹,高降压比,独特的燃料输送,领先的控制系统和专有设计元素,结合起来可提供高达99%的效率。网址http://www.aerco.com电话845-580- 8000电话号码845-580-8000工业工业机械制造公司规模51-200名员工LinkedIn上包括当前雇主AERCO International, Inc的成员,包括兼职职位。总部成立于1949年,纽约州布劳维尔特,专长:冷凝锅炉,水加热和能源回收产品的领先制造商,半瞬时水加热的鼻祖
|
||||||
Prince PLC:概述Prince PLC是为住宅、工业、市政和商业环境提供优质水解决方案的全球领导者。我们的品牌家族提供世界上最多样化的产品线之一,拥有世界级的水相关解决方案,专注于:•管道和流量控制•水质和调理•水再利用和排水•hvac•市政水务战略目标瓦茨水务在纽约证券交易所上市,代码为“WTS”。作为一家上市公司,股东价值的增长至关重要。为此,我们将重点放在五部分全球战略上:增长、卓越商业、卓越运营、“一瓦茨水”以及人才与绩效文化。在所有社交媒体平台关注我们@WattsWater网站http://www.watts.com/行业批发建材公司规模5,001-10,000名员工领英2,248名包括目前雇主为WattsWater Technologies的成员,包括兼职职位。总部北安多弗,MA专业管道,暖通空调,水质,气体,空调,自来水厂和排水
|
Prince PLC:概述Prince PLC是为住宅、工业、市政和商业环境提供优质水解决方案的全球领导者。我们的品牌家族提供世界上最多样化的产品线之一,拥有世界级的水相关解决方案,专注于:•管道和流量控制•水质和调理•水再利用和排水•hvac•市政水务战略目标瓦茨水务在纽约证券交易所上市,代码为“WTS”。作为一家上市公司,股东价值的增长至关重要。为此,我们将重点放在五部分全球战略上:增长、卓越商业、卓越运营、“一瓦茨水”以及人才与绩效文化。在所有社交媒体平台关注我们@WattsWater网站http://www.watts.com/行业批发建材公司规模5,001-10,000名员工领英2,248名包括目前雇主为WattsWater Technologies的成员,包括兼职职位。总部北安多弗,MA专业管道,暖通空调,水质,气体,空调,自来水厂和排水
|
||||||
万怡酒店是万豪国际最大的酒店品牌,在全球50多个国家拥有1100多家酒店。所以,无论你的激情带你去哪里,你都会发现我们会帮助你追随它。万豪酒店的骄傲会员。
|
万怡酒店是万豪国际最大的酒店品牌,在全球50多个国家拥有1100多家酒店。所以,无论你的激情带你去哪里,你都会发现我们会帮助你追随它。万豪酒店的骄傲会员。
|
||||||
|
|
|
@ -98,4 +98,4 @@ Index,Organization Id,Company Name,Website,Country,Description,Founded,Industry,
|
||||||
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
|
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
|
||||||
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
|
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
|
||||||
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
|
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
|
||||||
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785
|
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785
|
||||||
|
|
|
|
@ -4,4 +4,4 @@
|
||||||
{"content":"Aliquam sollicitudin ante ligula, eget malesuada nibh efficitur et. Pellentesque massa sem, scelerisque sit amet odio id, cursus tempor urna. Etiam congue dignissim volutpat. Vestibulum pharetra libero et velit gravida euismod."}
|
{"content":"Aliquam sollicitudin ante ligula, eget malesuada nibh efficitur et. Pellentesque massa sem, scelerisque sit amet odio id, cursus tempor urna. Etiam congue dignissim volutpat. Vestibulum pharetra libero et velit gravida euismod."}
|
||||||
],
|
],
|
||||||
"name":"player"
|
"name":"player"
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,101 +1,101 @@
|
||||||
Index,Organization Id,Name,Website,Country,Description,Founded,Industry,Number of employees
|
Index,Organization Id,Name,Website,Country,Description,Founded,Industry,Number of employees
|
||||||
1,FAB0d41d5b5d22c,Ferrell LLC,https://price.net/,Papua New Guinea,Horizontal empowering knowledgebase,1990,Plastics,3498
|
1,FAB0d41d5b5d22c,Ferrell LLC,https://price.net/,Papua New Guinea,Horizontal empowering knowledgebase,1990,Plastics,3498
|
||||||
2,6A7EdDEA9FaDC52,"Mckinney, Riley and Day",http://www.hall-buchanan.info/,Finland,User-centric system-worthy leverage,2015,Glass / Ceramics / Concrete,4952
|
2,6A7EdDEA9FaDC52,"Mckinney, Riley and Day",http://www.hall-buchanan.info/,Finland,User-centric system-worthy leverage,2015,Glass / Ceramics / Concrete,4952
|
||||||
3,0bFED1ADAE4bcC1,Hester Ltd,http://sullivan-reed.com/,China,Switchable scalable moratorium,1971,Public Safety,5287
|
3,0bFED1ADAE4bcC1,Hester Ltd,http://sullivan-reed.com/,China,Switchable scalable moratorium,1971,Public Safety,5287
|
||||||
4,2bFC1Be8a4ce42f,Holder-Sellers,https://becker.com/,Turkmenistan,De-engineered systemic artificial intelligence,2004,Automotive,921
|
4,2bFC1Be8a4ce42f,Holder-Sellers,https://becker.com/,Turkmenistan,De-engineered systemic artificial intelligence,2004,Automotive,921
|
||||||
5,9eE8A6a4Eb96C24,Mayer Group,http://www.brewer.com/,Mauritius,Synchronized needs-based challenge,1991,Transportation,7870
|
5,9eE8A6a4Eb96C24,Mayer Group,http://www.brewer.com/,Mauritius,Synchronized needs-based challenge,1991,Transportation,7870
|
||||||
6,cC757116fe1C085,Henry-Thompson,http://morse.net/,Bahamas,Face-to-face well-modulated customer loyalty,1992,Primary / Secondary Education,4914
|
6,cC757116fe1C085,Henry-Thompson,http://morse.net/,Bahamas,Face-to-face well-modulated customer loyalty,1992,Primary / Secondary Education,4914
|
||||||
7,219233e8aFF1BC3,Hansen-Everett,https://www.kidd.org/,Pakistan,Seamless disintermediate collaboration,2018,Publishing Industry,7832
|
7,219233e8aFF1BC3,Hansen-Everett,https://www.kidd.org/,Pakistan,Seamless disintermediate collaboration,2018,Publishing Industry,7832
|
||||||
8,ccc93DCF81a31CD,Mcintosh-Mora,https://www.brooks.com/,Heard Island and McDonald Islands,Centralized attitude-oriented capability,1970,Import / Export,4389
|
8,ccc93DCF81a31CD,Mcintosh-Mora,https://www.brooks.com/,Heard Island and McDonald Islands,Centralized attitude-oriented capability,1970,Import / Export,4389
|
||||||
9,0B4F93aA06ED03e,Carr Inc,http://ross.com/,Kuwait,Distributed impactful customer loyalty,1996,Plastics,8167
|
9,0B4F93aA06ED03e,Carr Inc,http://ross.com/,Kuwait,Distributed impactful customer loyalty,1996,Plastics,8167
|
||||||
10,738b5aDe6B1C6A5,Gaines Inc,http://sandoval-hooper.com/,Uzbekistan,Multi-lateral scalable protocol,1997,Outsourcing / Offshoring,9698
|
10,738b5aDe6B1C6A5,Gaines Inc,http://sandoval-hooper.com/,Uzbekistan,Multi-lateral scalable protocol,1997,Outsourcing / Offshoring,9698
|
||||||
11,AE61b8Ffebbc476,Kidd Group,http://www.lyons.com/,Bouvet Island (Bouvetoya),Proactive foreground paradigm,2001,Primary / Secondary Education,7473
|
11,AE61b8Ffebbc476,Kidd Group,http://www.lyons.com/,Bouvet Island (Bouvetoya),Proactive foreground paradigm,2001,Primary / Secondary Education,7473
|
||||||
12,eb3B7D06cCdD609,Crane-Clarke,https://www.sandoval.com/,Denmark,Front-line clear-thinking encryption,2014,Food / Beverages,9011
|
12,eb3B7D06cCdD609,Crane-Clarke,https://www.sandoval.com/,Denmark,Front-line clear-thinking encryption,2014,Food / Beverages,9011
|
||||||
13,8D0c29189C9798B,"Keller, Campos and Black",https://www.garner.info/,Liberia,Ameliorated directional emulation,2020,Museums / Institutions,2862
|
13,8D0c29189C9798B,"Keller, Campos and Black",https://www.garner.info/,Liberia,Ameliorated directional emulation,2020,Museums / Institutions,2862
|
||||||
14,D2c91cc03CA394c,Glover-Pope,http://www.silva.biz/,United Arab Emirates,Persevering contextually-based approach,2013,Medical Practice,9079
|
14,D2c91cc03CA394c,Glover-Pope,http://www.silva.biz/,United Arab Emirates,Persevering contextually-based approach,2013,Medical Practice,9079
|
||||||
15,C8AC1eaf9C036F4,Pacheco-Spears,https://aguilar.com/,Sweden,Secured logistical synergy,1984,Maritime,769
|
15,C8AC1eaf9C036F4,Pacheco-Spears,https://aguilar.com/,Sweden,Secured logistical synergy,1984,Maritime,769
|
||||||
16,b5D10A14f7a8AfE,Hodge-Ayers,http://www.archer-elliott.com/,Honduras,Future-proofed radical implementation,1990,Facilities Services,8508
|
16,b5D10A14f7a8AfE,Hodge-Ayers,http://www.archer-elliott.com/,Honduras,Future-proofed radical implementation,1990,Facilities Services,8508
|
||||||
17,68139b5C4De03B4,"Bowers, Guerra and Krause",http://www.carrillo-nicholson.com/,Uganda,De-engineered transitional strategy,1972,Primary / Secondary Education,6986
|
17,68139b5C4De03B4,"Bowers, Guerra and Krause",http://www.carrillo-nicholson.com/,Uganda,De-engineered transitional strategy,1972,Primary / Secondary Education,6986
|
||||||
18,5c2EffEfdba2BdF,Mckenzie-Melton,http://montoya-thompson.com/,Hong Kong,Reverse-engineered heuristic alliance,1998,Investment Management / Hedge Fund / Private Equity,4589
|
18,5c2EffEfdba2BdF,Mckenzie-Melton,http://montoya-thompson.com/,Hong Kong,Reverse-engineered heuristic alliance,1998,Investment Management / Hedge Fund / Private Equity,4589
|
||||||
19,ba179F19F7925f5,Branch-Mann,http://www.lozano.com/,Botswana,Adaptive intangible frame,1999,Architecture / Planning,7961
|
19,ba179F19F7925f5,Branch-Mann,http://www.lozano.com/,Botswana,Adaptive intangible frame,1999,Architecture / Planning,7961
|
||||||
20,c1Ce9B350BAc66b,Weiss and Sons,https://barrett.com/,Korea,Sharable optimal functionalities,2011,Plastics,5984
|
20,c1Ce9B350BAc66b,Weiss and Sons,https://barrett.com/,Korea,Sharable optimal functionalities,2011,Plastics,5984
|
||||||
21,8de40AC4e6EaCa4,"Velez, Payne and Coffey",http://burton.com/,Luxembourg,Mandatory coherent synergy,1986,Wholesale,5010
|
21,8de40AC4e6EaCa4,"Velez, Payne and Coffey",http://burton.com/,Luxembourg,Mandatory coherent synergy,1986,Wholesale,5010
|
||||||
22,Aad86a4F0385F2d,Harrell LLC,http://www.frey-rosario.com/,Guadeloupe,Reverse-engineered mission-critical moratorium,2018,Construction,2185
|
22,Aad86a4F0385F2d,Harrell LLC,http://www.frey-rosario.com/,Guadeloupe,Reverse-engineered mission-critical moratorium,2018,Construction,2185
|
||||||
23,22aC3FFd64fD703,"Eaton, Reynolds and Vargas",http://www.freeman.biz/,Monaco,Self-enabling multi-tasking process improvement,2014,Luxury Goods / Jewelry,8987
|
23,22aC3FFd64fD703,"Eaton, Reynolds and Vargas",http://www.freeman.biz/,Monaco,Self-enabling multi-tasking process improvement,2014,Luxury Goods / Jewelry,8987
|
||||||
24,5Ec4C272bCf085c,Robbins-Cummings,http://donaldson-wilkins.com/,Belgium,Organic non-volatile hierarchy,1991,Pharmaceuticals,5038
|
24,5Ec4C272bCf085c,Robbins-Cummings,http://donaldson-wilkins.com/,Belgium,Organic non-volatile hierarchy,1991,Pharmaceuticals,5038
|
||||||
25,5fDBeA8BB91a000,Jenkins Inc,http://www.kirk.biz/,South Africa,Front-line systematic help-desk,2002,Insurance,1215
|
25,5fDBeA8BB91a000,Jenkins Inc,http://www.kirk.biz/,South Africa,Front-line systematic help-desk,2002,Insurance,1215
|
||||||
26,dFfD6a6F9AC2d9C,"Greene, Benjamin and Novak",http://www.kent.net/,Romania,Centralized leadingedge moratorium,2012,Museums / Institutions,4941
|
26,dFfD6a6F9AC2d9C,"Greene, Benjamin and Novak",http://www.kent.net/,Romania,Centralized leadingedge moratorium,2012,Museums / Institutions,4941
|
||||||
27,4B217cC5a0674C5,"Dickson, Richmond and Clay",http://everett.com/,Czech Republic,Team-oriented tangible complexity,1980,Real Estate / Mortgage,3122
|
27,4B217cC5a0674C5,"Dickson, Richmond and Clay",http://everett.com/,Czech Republic,Team-oriented tangible complexity,1980,Real Estate / Mortgage,3122
|
||||||
28,88b1f1cDcf59a37,Prince-David,http://thompson.com/,Christmas Island,Virtual holistic methodology,1970,Banking / Mortgage,1046
|
28,88b1f1cDcf59a37,Prince-David,http://thompson.com/,Christmas Island,Virtual holistic methodology,1970,Banking / Mortgage,1046
|
||||||
29,f9F7bBCAEeC360F,Ayala LLC,http://www.zhang.com/,Philippines,Open-source zero administration hierarchy,2021,Legal Services,7664
|
29,f9F7bBCAEeC360F,Ayala LLC,http://www.zhang.com/,Philippines,Open-source zero administration hierarchy,2021,Legal Services,7664
|
||||||
30,7Cb3AeFcE4Ba31e,Rivas Group,https://hebert.org/,Australia,Open-architected well-modulated capacity,1998,Logistics / Procurement,4155
|
30,7Cb3AeFcE4Ba31e,Rivas Group,https://hebert.org/,Australia,Open-architected well-modulated capacity,1998,Logistics / Procurement,4155
|
||||||
31,ccBcC32adcbc530,"Sloan, Mays and Whitehead",http://lawson.com/,Chad,Face-to-face high-level conglomeration,1997,Civil Engineering,365
|
31,ccBcC32adcbc530,"Sloan, Mays and Whitehead",http://lawson.com/,Chad,Face-to-face high-level conglomeration,1997,Civil Engineering,365
|
||||||
32,f5afd686b3d05F5,"Durham, Allen and Barnes",http://chan-stafford.org/,Zimbabwe,Synergistic web-enabled framework,1993,Mechanical or Industrial Engineering,6135
|
32,f5afd686b3d05F5,"Durham, Allen and Barnes",http://chan-stafford.org/,Zimbabwe,Synergistic web-enabled framework,1993,Mechanical or Industrial Engineering,6135
|
||||||
33,38C6cfC5074Fa5e,Fritz-Franklin,http://www.lambert.com/,Nepal,Automated 4thgeneration website,1972,Hospitality,4516
|
33,38C6cfC5074Fa5e,Fritz-Franklin,http://www.lambert.com/,Nepal,Automated 4thgeneration website,1972,Hospitality,4516
|
||||||
34,5Cd7efccCcba38f,Burch-Ewing,http://cline.net/,Taiwan,User-centric 4thgeneration system engine,1981,Venture Capital / VC,7443
|
34,5Cd7efccCcba38f,Burch-Ewing,http://cline.net/,Taiwan,User-centric 4thgeneration system engine,1981,Venture Capital / VC,7443
|
||||||
35,9E6Acb51e3F9d6F,"Glass, Barrera and Turner",https://dunlap.com/,Kyrgyz Republic,Multi-channeled 3rdgeneration open system,2020,Utilities,2610
|
35,9E6Acb51e3F9d6F,"Glass, Barrera and Turner",https://dunlap.com/,Kyrgyz Republic,Multi-channeled 3rdgeneration open system,2020,Utilities,2610
|
||||||
36,4D4d7E18321eaeC,Pineda-Cox,http://aguilar.org/,Bolivia,Fundamental asynchronous capability,2010,Human Resources / HR,1312
|
36,4D4d7E18321eaeC,Pineda-Cox,http://aguilar.org/,Bolivia,Fundamental asynchronous capability,2010,Human Resources / HR,1312
|
||||||
37,485f5d06B938F2b,"Baker, Mccann and Macdonald",http://www.anderson-barker.com/,Kenya,Cross-group user-facing focus group,2013,Legislative Office,1638
|
37,485f5d06B938F2b,"Baker, Mccann and Macdonald",http://www.anderson-barker.com/,Kenya,Cross-group user-facing focus group,2013,Legislative Office,1638
|
||||||
38,19E3a5Bf6dBDc4F,Cuevas-Moss,https://dodson-castaneda.net/,Guatemala,Extended human-resource intranet,1994,Music,9995
|
38,19E3a5Bf6dBDc4F,Cuevas-Moss,https://dodson-castaneda.net/,Guatemala,Extended human-resource intranet,1994,Music,9995
|
||||||
39,6883A965c7b68F7,Hahn PLC,http://newman.com/,Belarus,Organic logistical leverage,2012,Electrical / Electronic Manufacturing,3715
|
39,6883A965c7b68F7,Hahn PLC,http://newman.com/,Belarus,Organic logistical leverage,2012,Electrical / Electronic Manufacturing,3715
|
||||||
40,AC5B7AA74Aa4A2E,"Valentine, Ferguson and Kramer",http://stuart.net/,Jersey,Centralized secondary time-frame,1997,Non - Profit / Volunteering,3585
|
40,AC5B7AA74Aa4A2E,"Valentine, Ferguson and Kramer",http://stuart.net/,Jersey,Centralized secondary time-frame,1997,Non - Profit / Volunteering,3585
|
||||||
41,decab0D5027CA6a,Arroyo Inc,https://www.turner.com/,Grenada,Managed demand-driven website,2006,Writing / Editing,9067
|
41,decab0D5027CA6a,Arroyo Inc,https://www.turner.com/,Grenada,Managed demand-driven website,2006,Writing / Editing,9067
|
||||||
42,dF084FbBb613eea,Walls LLC,http://www.reese-vasquez.biz/,Cape Verde,Self-enabling fresh-thinking installation,1989,Investment Management / Hedge Fund / Private Equity,1678
|
42,dF084FbBb613eea,Walls LLC,http://www.reese-vasquez.biz/,Cape Verde,Self-enabling fresh-thinking installation,1989,Investment Management / Hedge Fund / Private Equity,1678
|
||||||
43,A2D89Ab9bCcAd4e,"Mitchell, Warren and Schneider",https://fox.biz/,Trinidad and Tobago,Enhanced intangible time-frame,2021,Capital Markets / Hedge Fund / Private Equity,3816
|
43,A2D89Ab9bCcAd4e,"Mitchell, Warren and Schneider",https://fox.biz/,Trinidad and Tobago,Enhanced intangible time-frame,2021,Capital Markets / Hedge Fund / Private Equity,3816
|
||||||
44,77aDc905434a49f,Prince PLC,https://www.watts.com/,Sweden,Profit-focused coherent installation,2016,Individual / Family Services,7645
|
44,77aDc905434a49f,Prince PLC,https://www.watts.com/,Sweden,Profit-focused coherent installation,2016,Individual / Family Services,7645
|
||||||
45,235fdEFE2cfDa5F,Brock-Blackwell,http://www.small.com/,Benin,Secured foreground emulation,1986,Online Publishing,7034
|
45,235fdEFE2cfDa5F,Brock-Blackwell,http://www.small.com/,Benin,Secured foreground emulation,1986,Online Publishing,7034
|
||||||
46,1eD64cFe986BBbE,Walton-Barnett,https://ashley-schaefer.com/,Western Sahara,Right-sized clear-thinking flexibility,2001,Luxury Goods / Jewelry,1746
|
46,1eD64cFe986BBbE,Walton-Barnett,https://ashley-schaefer.com/,Western Sahara,Right-sized clear-thinking flexibility,2001,Luxury Goods / Jewelry,1746
|
||||||
47,CbBbFcdd0eaE2cF,Bartlett-Arroyo,https://cruz.com/,Northern Mariana Islands,Realigned didactic function,1976,Civic / Social Organization,3987
|
47,CbBbFcdd0eaE2cF,Bartlett-Arroyo,https://cruz.com/,Northern Mariana Islands,Realigned didactic function,1976,Civic / Social Organization,3987
|
||||||
48,49aECbDaE6aBD53,"Wallace, Madden and Morris",http://www.blevins-fernandez.biz/,Germany,Persistent real-time customer loyalty,2016,Pharmaceuticals,9443
|
48,49aECbDaE6aBD53,"Wallace, Madden and Morris",http://www.blevins-fernandez.biz/,Germany,Persistent real-time customer loyalty,2016,Pharmaceuticals,9443
|
||||||
49,7b3fe6e7E72bFa4,Berg-Sparks,https://cisneros-love.com/,Canada,Stand-alone static implementation,1974,Arts / Crafts,2073
|
49,7b3fe6e7E72bFa4,Berg-Sparks,https://cisneros-love.com/,Canada,Stand-alone static implementation,1974,Arts / Crafts,2073
|
||||||
50,c6DedA82A8aef7E,Gonzales Ltd,http://bird.com/,Tonga,Managed human-resource policy,1988,Consumer Goods,9069
|
50,c6DedA82A8aef7E,Gonzales Ltd,http://bird.com/,Tonga,Managed human-resource policy,1988,Consumer Goods,9069
|
||||||
51,7D9FBF85cdC3871,Lawson and Sons,https://www.wong.com/,French Southern Territories,Compatible analyzing intranet,2021,Arts / Crafts,3527
|
51,7D9FBF85cdC3871,Lawson and Sons,https://www.wong.com/,French Southern Territories,Compatible analyzing intranet,2021,Arts / Crafts,3527
|
||||||
52,7dd18Fb7cB07b65,"Mcguire, Mcconnell and Olsen",https://melton-briggs.com/,Korea,Profound client-server frame,1988,Printing,8445
|
52,7dd18Fb7cB07b65,"Mcguire, Mcconnell and Olsen",https://melton-briggs.com/,Korea,Profound client-server frame,1988,Printing,8445
|
||||||
53,EF5B55FadccB8Fe,Charles-Phillips,https://bowman.com/,Cote d'Ivoire,Monitored client-server implementation,2012,Mental Health Care,3450
|
53,EF5B55FadccB8Fe,Charles-Phillips,https://bowman.com/,Cote d'Ivoire,Monitored client-server implementation,2012,Mental Health Care,3450
|
||||||
54,f8D4B99e11fAF5D,Odom Ltd,https://www.humphrey-hess.com/,Cote d'Ivoire,Advanced static process improvement,2012,Management Consulting,1825
|
54,f8D4B99e11fAF5D,Odom Ltd,https://www.humphrey-hess.com/,Cote d'Ivoire,Advanced static process improvement,2012,Management Consulting,1825
|
||||||
55,e24D21BFd3bF1E5,Richard PLC,https://holden-coleman.net/,Mayotte,Object-based optimizing model,1971,Broadcast Media,4942
|
55,e24D21BFd3bF1E5,Richard PLC,https://holden-coleman.net/,Mayotte,Object-based optimizing model,1971,Broadcast Media,4942
|
||||||
56,B9BdfEB6D3Ca44E,Sampson Ltd,https://blevins.com/,Cayman Islands,Intuitive local adapter,2005,Farming,1418
|
56,B9BdfEB6D3Ca44E,Sampson Ltd,https://blevins.com/,Cayman Islands,Intuitive local adapter,2005,Farming,1418
|
||||||
57,2a74D6f3D3B268e,"Cherry, Le and Callahan",https://waller-delacruz.biz/,Nigeria,Universal human-resource collaboration,2017,Entertainment / Movie Production,7202
|
57,2a74D6f3D3B268e,"Cherry, Le and Callahan",https://waller-delacruz.biz/,Nigeria,Universal human-resource collaboration,2017,Entertainment / Movie Production,7202
|
||||||
58,Bf3F3f62c8aBC33,Cherry PLC,https://www.avila.info/,Marshall Islands,Persistent tertiary website,1980,Plastics,8245
|
58,Bf3F3f62c8aBC33,Cherry PLC,https://www.avila.info/,Marshall Islands,Persistent tertiary website,1980,Plastics,8245
|
||||||
59,aeBe26B80a7a23c,Melton-Nichols,https://kennedy.com/,Palau,User-friendly clear-thinking productivity,2021,Legislative Office,8741
|
59,aeBe26B80a7a23c,Melton-Nichols,https://kennedy.com/,Palau,User-friendly clear-thinking productivity,2021,Legislative Office,8741
|
||||||
60,aAeb29ad43886C6,Potter-Walsh,http://thomas-french.org/,Turkey,Optional non-volatile open system,2008,Human Resources / HR,6923
|
60,aAeb29ad43886C6,Potter-Walsh,http://thomas-french.org/,Turkey,Optional non-volatile open system,2008,Human Resources / HR,6923
|
||||||
61,bD1bc6bB6d1FeD3,Freeman-Chen,https://mathis.com/,Timor-Leste,Phased next generation adapter,1973,International Trade / Development,346
|
61,bD1bc6bB6d1FeD3,Freeman-Chen,https://mathis.com/,Timor-Leste,Phased next generation adapter,1973,International Trade / Development,346
|
||||||
62,EB9f456e8b7022a,Soto Group,https://norris.info/,Vietnam,Enterprise-wide executive installation,1988,Business Supplies / Equipment,9097
|
62,EB9f456e8b7022a,Soto Group,https://norris.info/,Vietnam,Enterprise-wide executive installation,1988,Business Supplies / Equipment,9097
|
||||||
63,Dfef38C51D8DAe3,"Poole, Cruz and Whitney",https://reed.info/,Reunion,Balanced analyzing groupware,1978,Marketing / Advertising / Sales,2992
|
63,Dfef38C51D8DAe3,"Poole, Cruz and Whitney",https://reed.info/,Reunion,Balanced analyzing groupware,1978,Marketing / Advertising / Sales,2992
|
||||||
64,055ffEfB2Dd95B0,Riley Ltd,http://wiley.com/,Brazil,Optional exuding superstructure,1986,Textiles,9315
|
64,055ffEfB2Dd95B0,Riley Ltd,http://wiley.com/,Brazil,Optional exuding superstructure,1986,Textiles,9315
|
||||||
65,cBfe4dbAE1699da,"Erickson, Andrews and Bailey",https://www.hobbs-grant.com/,Eritrea,Vision-oriented secondary project,2014,Consumer Electronics,7829
|
65,cBfe4dbAE1699da,"Erickson, Andrews and Bailey",https://www.hobbs-grant.com/,Eritrea,Vision-oriented secondary project,2014,Consumer Electronics,7829
|
||||||
66,fdFbecbadcdCdf1,"Wilkinson, Charles and Arroyo",http://hunter-mcfarland.com/,United States Virgin Islands,Assimilated 24/7 archive,1996,Building Materials,602
|
66,fdFbecbadcdCdf1,"Wilkinson, Charles and Arroyo",http://hunter-mcfarland.com/,United States Virgin Islands,Assimilated 24/7 archive,1996,Building Materials,602
|
||||||
67,5DCb8A5a5ca03c0,Floyd Ltd,http://www.whitney.com/,Falkland Islands (Malvinas),Function-based fault-tolerant concept,2017,Public Relations / PR,2911
|
67,5DCb8A5a5ca03c0,Floyd Ltd,http://www.whitney.com/,Falkland Islands (Malvinas),Function-based fault-tolerant concept,2017,Public Relations / PR,2911
|
||||||
68,ce57DCbcFD6d618,Newman-Galloway,https://www.scott.com/,Luxembourg,Enhanced foreground collaboration,1987,Information Technology / IT,3934
|
68,ce57DCbcFD6d618,Newman-Galloway,https://www.scott.com/,Luxembourg,Enhanced foreground collaboration,1987,Information Technology / IT,3934
|
||||||
69,5aaD187dc929371,Frazier-Butler,https://www.daugherty-farley.info/,Northern Mariana Islands,Persistent interactive circuit,1972,Outsourcing / Offshoring,5130
|
69,5aaD187dc929371,Frazier-Butler,https://www.daugherty-farley.info/,Northern Mariana Islands,Persistent interactive circuit,1972,Outsourcing / Offshoring,5130
|
||||||
70,902D7Ac8b6d476b,Newton Inc,https://www.richmond-manning.info/,Netherlands Antilles,Fundamental stable info-mediaries,1976,Military Industry,563
|
70,902D7Ac8b6d476b,Newton Inc,https://www.richmond-manning.info/,Netherlands Antilles,Fundamental stable info-mediaries,1976,Military Industry,563
|
||||||
71,32BB9Ff4d939788,Duffy-Levy,https://www.potter.com/,Guernsey,Diverse exuding installation,1982,Wireless,6146
|
71,32BB9Ff4d939788,Duffy-Levy,https://www.potter.com/,Guernsey,Diverse exuding installation,1982,Wireless,6146
|
||||||
72,adcB0afbE58bAe3,Wagner LLC,https://decker-esparza.com/,Uruguay,Reactive attitude-oriented toolset,1987,International Affairs,6874
|
72,adcB0afbE58bAe3,Wagner LLC,https://decker-esparza.com/,Uruguay,Reactive attitude-oriented toolset,1987,International Affairs,6874
|
||||||
73,dfcA1c84AdB61Ac,Mccall-Holmes,http://www.dean.com/,Benin,Object-based value-added database,2009,Legal Services,696
|
73,dfcA1c84AdB61Ac,Mccall-Holmes,http://www.dean.com/,Benin,Object-based value-added database,2009,Legal Services,696
|
||||||
74,208044AC2fe52F3,Massey LLC,https://frazier.biz/,Suriname,Configurable zero administration Graphical User Interface,1986,Accounting,5004
|
74,208044AC2fe52F3,Massey LLC,https://frazier.biz/,Suriname,Configurable zero administration Graphical User Interface,1986,Accounting,5004
|
||||||
75,f3C365f0c1A0623,Hicks LLC,http://alvarez.biz/,Pakistan,Quality-focused client-server Graphical User Interface,1970,Computer Software / Engineering,8480
|
75,f3C365f0c1A0623,Hicks LLC,http://alvarez.biz/,Pakistan,Quality-focused client-server Graphical User Interface,1970,Computer Software / Engineering,8480
|
||||||
76,ec5Bdd3CBAfaB93,"Cole, Russell and Avery",http://www.blankenship.com/,Mongolia,De-engineered fault-tolerant challenge,2000,Law Enforcement,7012
|
76,ec5Bdd3CBAfaB93,"Cole, Russell and Avery",http://www.blankenship.com/,Mongolia,De-engineered fault-tolerant challenge,2000,Law Enforcement,7012
|
||||||
77,DDB19Be7eeB56B4,Cummings-Rojas,https://simon-pearson.com/,Svalbard & Jan Mayen Islands,User-centric modular customer loyalty,2012,Financial Services,7529
|
77,DDB19Be7eeB56B4,Cummings-Rojas,https://simon-pearson.com/,Svalbard & Jan Mayen Islands,User-centric modular customer loyalty,2012,Financial Services,7529
|
||||||
78,dd6CA3d0bc3cAfc,"Beasley, Greene and Mahoney",http://www.petersen-lawrence.com/,Togo,Extended content-based methodology,1976,Religious Institutions,869
|
78,dd6CA3d0bc3cAfc,"Beasley, Greene and Mahoney",http://www.petersen-lawrence.com/,Togo,Extended content-based methodology,1976,Religious Institutions,869
|
||||||
79,A0B9d56e61070e3,"Beasley, Sims and Allison",http://burke.info/,Latvia,Secured zero tolerance hub,1972,Facilities Services,6182
|
79,A0B9d56e61070e3,"Beasley, Sims and Allison",http://burke.info/,Latvia,Secured zero tolerance hub,1972,Facilities Services,6182
|
||||||
80,cBa7EFe5D05Adaf,Crawford-Rivera,https://black-ramirez.org/,Cuba,Persevering exuding budgetary management,1999,Online Publishing,7805
|
80,cBa7EFe5D05Adaf,Crawford-Rivera,https://black-ramirez.org/,Cuba,Persevering exuding budgetary management,1999,Online Publishing,7805
|
||||||
81,Ea3f6D52Ec73563,Montes-Hensley,https://krueger.org/,Liechtenstein,Multi-tiered secondary productivity,2009,Printing,8433
|
81,Ea3f6D52Ec73563,Montes-Hensley,https://krueger.org/,Liechtenstein,Multi-tiered secondary productivity,2009,Printing,8433
|
||||||
82,bC0CEd48A8000E0,Velazquez-Odom,https://stokes.com/,Djibouti,Streamlined 6thgeneration function,2002,Alternative Dispute Resolution,4044
|
82,bC0CEd48A8000E0,Velazquez-Odom,https://stokes.com/,Djibouti,Streamlined 6thgeneration function,2002,Alternative Dispute Resolution,4044
|
||||||
83,c89b9b59BC4baa1,Eaton-Morales,https://www.reeves-graham.com/,Micronesia,Customer-focused explicit frame,1990,Capital Markets / Hedge Fund / Private Equity,7013
|
83,c89b9b59BC4baa1,Eaton-Morales,https://www.reeves-graham.com/,Micronesia,Customer-focused explicit frame,1990,Capital Markets / Hedge Fund / Private Equity,7013
|
||||||
84,FEC51bce8421a7b,"Roberson, Pennington and Palmer",http://www.keith-fisher.com/,Cameroon,Adaptive bi-directional hierarchy,1993,Telecommunications,5571
|
84,FEC51bce8421a7b,"Roberson, Pennington and Palmer",http://www.keith-fisher.com/,Cameroon,Adaptive bi-directional hierarchy,1993,Telecommunications,5571
|
||||||
85,e0E8e27eAc9CAd5,"George, Russo and Guerra",https://drake.com/,Sweden,Centralized non-volatile capability,1989,Military Industry,2880
|
85,e0E8e27eAc9CAd5,"George, Russo and Guerra",https://drake.com/,Sweden,Centralized non-volatile capability,1989,Military Industry,2880
|
||||||
86,B97a6CF9bf5983C,Davila Inc,https://mcconnell.info/,Cocos (Keeling) Islands,Profit-focused dedicated frame,2017,Consumer Electronics,2215
|
86,B97a6CF9bf5983C,Davila Inc,https://mcconnell.info/,Cocos (Keeling) Islands,Profit-focused dedicated frame,2017,Consumer Electronics,2215
|
||||||
87,a0a6f9b3DbcBEb5,Mays-Preston,http://www.browning-key.com/,Mali,User-centric heuristic focus group,2006,Military Industry,5786
|
87,a0a6f9b3DbcBEb5,Mays-Preston,http://www.browning-key.com/,Mali,User-centric heuristic focus group,2006,Military Industry,5786
|
||||||
88,8cC1bDa330a5871,Pineda-Morton,https://www.carr.com/,United States Virgin Islands,Grass-roots methodical info-mediaries,1991,Printing,6168
|
88,8cC1bDa330a5871,Pineda-Morton,https://www.carr.com/,United States Virgin Islands,Grass-roots methodical info-mediaries,1991,Printing,6168
|
||||||
89,ED889CB2FE9cbd3,Huang and Sons,https://www.bolton.com/,Eritrea,Re-contextualized dynamic hierarchy,1981,Semiconductors,7484
|
89,ED889CB2FE9cbd3,Huang and Sons,https://www.bolton.com/,Eritrea,Re-contextualized dynamic hierarchy,1981,Semiconductors,7484
|
||||||
90,F4Dc1417BC6cb8f,Gilbert-Simon,https://www.bradford.biz/,Burundi,Grass-roots radical parallelism,1973,Newspapers / Journalism,1927
|
90,F4Dc1417BC6cb8f,Gilbert-Simon,https://www.bradford.biz/,Burundi,Grass-roots radical parallelism,1973,Newspapers / Journalism,1927
|
||||||
91,7ABc3c7ecA03B34,Sampson-Griffith,http://hendricks.org/,Benin,Multi-layered composite paradigm,1972,Textiles,3881
|
91,7ABc3c7ecA03B34,Sampson-Griffith,http://hendricks.org/,Benin,Multi-layered composite paradigm,1972,Textiles,3881
|
||||||
92,4e0719FBE38e0aB,Miles-Dominguez,http://www.turner.com/,Gibraltar,Organized empowering forecast,1996,Civic / Social Organization,897
|
92,4e0719FBE38e0aB,Miles-Dominguez,http://www.turner.com/,Gibraltar,Organized empowering forecast,1996,Civic / Social Organization,897
|
||||||
93,dEbDAAeDfaed00A,Rowe and Sons,https://www.simpson.org/,El Salvador,Balanced multimedia knowledgebase,1978,Facilities Services,8172
|
93,dEbDAAeDfaed00A,Rowe and Sons,https://www.simpson.org/,El Salvador,Balanced multimedia knowledgebase,1978,Facilities Services,8172
|
||||||
94,61BDeCfeFD0cEF5,"Valenzuela, Holmes and Rowland",https://www.dorsey.net/,Taiwan,Persistent tertiary focus group,1999,Transportation,1483
|
94,61BDeCfeFD0cEF5,"Valenzuela, Holmes and Rowland",https://www.dorsey.net/,Taiwan,Persistent tertiary focus group,1999,Transportation,1483
|
||||||
95,4e91eD25f486110,"Best, Wade and Shepard",https://zimmerman.com/,Zimbabwe,Innovative background definition,1991,Gambling / Casinos,4873
|
95,4e91eD25f486110,"Best, Wade and Shepard",https://zimmerman.com/,Zimbabwe,Innovative background definition,1991,Gambling / Casinos,4873
|
||||||
96,0a0bfFbBbB8eC7c,Holmes Group,https://mcdowell.org/,Ethiopia,Right-sized zero tolerance focus group,1975,Photography,2988
|
96,0a0bfFbBbB8eC7c,Holmes Group,https://mcdowell.org/,Ethiopia,Right-sized zero tolerance focus group,1975,Photography,2988
|
||||||
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
|
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
|
||||||
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
|
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
|
||||||
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
|
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
|
||||||
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785
|
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785
|
||||||
|
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<!-- saved from url=(0046)https://docs.python.org/3/library/logging.html -->
|
<!-- saved from url=(0046)https://docs.python.org/3/library/logging.html -->
|
||||||
<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/">
|
||||||
<meta property="og:title" content="logging — Logging facility for Python">
|
<meta property="og:title" content="logging — Logging facility for Python">
|
||||||
<meta property="og:type" content="website">
|
<meta property="og:type" content="website">
|
||||||
|
@ -16,18 +16,18 @@
|
||||||
<meta name="theme-color" content="#3776ab">
|
<meta name="theme-color" content="#3776ab">
|
||||||
|
|
||||||
<title>logging — Logging facility for Python — Python 3.11.5 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<title>logging — Logging facility for Python — Python 3.11.5 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
|
||||||
<link rel="stylesheet" type="text/css" href="./test_files/pygments.css">
|
<link rel="stylesheet" type="text/css" href="./test_files/pygments.css">
|
||||||
<link rel="stylesheet" type="text/css" href="./test_files/pydoctheme.css">
|
<link rel="stylesheet" type="text/css" href="./test_files/pydoctheme.css">
|
||||||
<link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="./test_files/pygments_dark.css">
|
<link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="./test_files/pygments_dark.css">
|
||||||
|
|
||||||
<script data-url_root="../" id="documentation_options" src="./test_files/documentation_options.js.download"></script>
|
<script data-url_root="../" id="documentation_options" src="./test_files/documentation_options.js.download"></script>
|
||||||
<script src="./test_files/jquery.js.download"></script>
|
<script src="./test_files/jquery.js.download"></script>
|
||||||
<script src="./test_files/underscore.js.download"></script>
|
<script src="./test_files/underscore.js.download"></script>
|
||||||
<script src="./test_files/doctools.js.download"></script>
|
<script src="./test_files/doctools.js.download"></script>
|
||||||
|
|
||||||
<script src="./test_files/sidebar.js.download"></script>
|
<script src="./test_files/sidebar.js.download"></script>
|
||||||
|
|
||||||
<link rel="search" type="application/opensearchdescription+xml" title="Search within Python 3.11.5 documentation" href="https://docs.python.org/3/_static/opensearch.xml">
|
<link rel="search" type="application/opensearchdescription+xml" title="Search within Python 3.11.5 documentation" href="https://docs.python.org/3/_static/opensearch.xml">
|
||||||
<link rel="author" title="About these documents" href="https://docs.python.org/3/about.html">
|
<link rel="author" title="About these documents" href="https://docs.python.org/3/about.html">
|
||||||
<link rel="index" title="Index" href="https://docs.python.org/3/genindex.html">
|
<link rel="index" title="Index" href="https://docs.python.org/3/genindex.html">
|
||||||
|
@ -36,11 +36,11 @@
|
||||||
<link rel="next" title="logging.config — Logging configuration" href="https://docs.python.org/3/library/logging.config.html">
|
<link rel="next" title="logging.config — Logging configuration" href="https://docs.python.org/3/library/logging.config.html">
|
||||||
<link rel="prev" title="getopt — C-style parser for command line options" href="https://docs.python.org/3/library/getopt.html">
|
<link rel="prev" title="getopt — C-style parser for command line options" href="https://docs.python.org/3/library/getopt.html">
|
||||||
<link rel="canonical" href="https://docs.python.org/3/library/logging.html">
|
<link rel="canonical" href="https://docs.python.org/3/library/logging.html">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
@media only screen {
|
@media only screen {
|
||||||
table.full-width-table {
|
table.full-width-table {
|
||||||
|
@ -52,7 +52,7 @@
|
||||||
<link rel="shortcut icon" type="image/png" href="./test_files/py.svg">
|
<link rel="shortcut icon" type="image/png" href="./test_files/py.svg">
|
||||||
<script type="text/javascript" src="./test_files/copybutton.js.download"></script>
|
<script type="text/javascript" src="./test_files/copybutton.js.download"></script>
|
||||||
<script type="text/javascript" src="./test_files/menu.js.download"></script>
|
<script type="text/javascript" src="./test_files/menu.js.download"></script>
|
||||||
<script type="text/javascript" src="./test_files/themetoggle.js.download"></script>
|
<script type="text/javascript" src="./test_files/themetoggle.js.download"></script>
|
||||||
|
|
||||||
</head>
|
</head>
|
||||||
<body data-new-gr-c-s-check-loaded="14.1038.0" data-gr-ext-installed="">
|
<body data-new-gr-c-s-check-loaded="14.1038.0" data-gr-ext-installed="">
|
||||||
|
@ -79,7 +79,7 @@
|
||||||
<div class="menu-wrapper">
|
<div class="menu-wrapper">
|
||||||
<nav class="menu" role="navigation" aria-label="main navigation" tabindex="-1">
|
<nav class="menu" role="navigation" aria-label="main navigation" tabindex="-1">
|
||||||
<div class="language_switcher_placeholder"><select id="language_select"><option value="en" selected="selected">English</option><option value="es">Spanish</option><option value="fr">French</option><option value="ja">Japanese</option><option value="ko">Korean</option><option value="pt-br">Brazilian Portuguese</option><option value="tr">Turkish</option><option value="zh-cn">Simplified Chinese</option><option value="zh-tw">Traditional Chinese</option></select></div>
|
<div class="language_switcher_placeholder"><select id="language_select"><option value="en" selected="selected">English</option><option value="es">Spanish</option><option value="fr">French</option><option value="ja">Japanese</option><option value="ko">Korean</option><option value="pt-br">Brazilian Portuguese</option><option value="tr">Turkish</option><option value="zh-cn">Simplified Chinese</option><option value="zh-tw">Traditional Chinese</option></select></div>
|
||||||
|
|
||||||
<label class="theme-selector-label">
|
<label class="theme-selector-label">
|
||||||
Theme
|
Theme
|
||||||
<select class="theme-selector" oninput="activateTheme(this.value)">
|
<select class="theme-selector" oninput="activateTheme(this.value)">
|
||||||
|
@ -131,7 +131,7 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<div class="related" role="navigation" aria-label="related navigation">
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
<h3>Navigation</h3>
|
<h3>Navigation</h3>
|
||||||
<ul>
|
<ul>
|
||||||
|
@ -151,7 +151,7 @@
|
||||||
<div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div>
|
<div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
|
|
||||||
</li>
|
</li>
|
||||||
<li id="cpython-language-and-version">
|
<li id="cpython-language-and-version">
|
||||||
<a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> »
|
<a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> »
|
||||||
|
@ -161,7 +161,7 @@
|
||||||
<li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html" accesskey="U">Generic Operating System Services</a> »</li>
|
<li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html" accesskey="U">Generic Operating System Services</a> »</li>
|
||||||
<li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li>
|
<li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li>
|
||||||
<li class="right">
|
<li class="right">
|
||||||
|
|
||||||
|
|
||||||
<div class="inline-search" role="search">
|
<div class="inline-search" role="search">
|
||||||
<form class="inline-search" action="https://docs.python.org/3/search.html" method="get">
|
<form class="inline-search" action="https://docs.python.org/3/search.html" method="get">
|
||||||
|
@ -180,15 +180,15 @@
|
||||||
<option value="dark">Dark</option>
|
<option value="dark">Dark</option>
|
||||||
</select>
|
</select>
|
||||||
</label> |</li>
|
</label> |</li>
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="document">
|
<div class="document">
|
||||||
<div class="documentwrapper">
|
<div class="documentwrapper">
|
||||||
<div class="bodywrapper">
|
<div class="bodywrapper">
|
||||||
<div class="body" role="main">
|
<div class="body" role="main">
|
||||||
|
|
||||||
<section id="module-logging">
|
<section id="module-logging">
|
||||||
<span id="logging-logging-facility-for-python"></span><h1><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> — Logging facility for Python<a class="headerlink" href="https://docs.python.org/3/library/logging.html#module-logging" title="Permalink to this headline">¶</a></h1>
|
<span id="logging-logging-facility-for-python"></span><h1><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> — Logging facility for Python<a class="headerlink" href="https://docs.python.org/3/library/logging.html#module-logging" title="Permalink to this headline">¶</a></h1>
|
||||||
<p><strong>Source code:</strong> <a class="reference external" href="https://github.com/python/cpython/tree/3.11/Lib/logging/__init__.py">Lib/logging/__init__.py</a></p>
|
<p><strong>Source code:</strong> <a class="reference external" href="https://github.com/python/cpython/tree/3.11/Lib/logging/__init__.py">Lib/logging/__init__.py</a></p>
|
||||||
|
@ -1871,7 +1871,7 @@ library.</p>
|
||||||
</div>
|
</div>
|
||||||
<div id="sidebarbutton"><span>«</span></div></div>
|
<div id="sidebarbutton"><span>«</span></div></div>
|
||||||
<div class="clearer"></div>
|
<div class="clearer"></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="related" role="navigation" aria-label="related navigation">
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
<h3>Navigation</h3>
|
<h3>Navigation</h3>
|
||||||
<ul>
|
<ul>
|
||||||
|
@ -1891,7 +1891,7 @@ library.</p>
|
||||||
<div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div>
|
<div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
|
|
||||||
</li>
|
</li>
|
||||||
<li id="cpython-language-and-version">
|
<li id="cpython-language-and-version">
|
||||||
<a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> »
|
<a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> »
|
||||||
|
@ -1901,7 +1901,7 @@ library.</p>
|
||||||
<li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html">Generic Operating System Services</a> »</li>
|
<li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html">Generic Operating System Services</a> »</li>
|
||||||
<li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li>
|
<li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li>
|
||||||
<li class="right">
|
<li class="right">
|
||||||
|
|
||||||
|
|
||||||
<div class="inline-search" role="search">
|
<div class="inline-search" role="search">
|
||||||
<form class="inline-search" action="https://docs.python.org/3/search.html" method="get">
|
<form class="inline-search" action="https://docs.python.org/3/search.html" method="get">
|
||||||
|
@ -1920,9 +1920,9 @@ library.</p>
|
||||||
<option value="dark">Dark</option>
|
<option value="dark">Dark</option>
|
||||||
</select>
|
</select>
|
||||||
</label> |</li>
|
</label> |</li>
|
||||||
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
© <a href="https://docs.python.org/3/copyright.html">Copyright</a> 2001-2023, Python Software Foundation.
|
© <a href="https://docs.python.org/3/copyright.html">Copyright</a> 2001-2023, Python Software Foundation.
|
||||||
<br>
|
<br>
|
||||||
|
@ -1946,7 +1946,7 @@ library.</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script type="text/javascript" src="./test_files/switchers.js.download"></script>
|
<script type="text/javascript" src="./test_files/switchers.js.download"></script>
|
||||||
|
|
||||||
<div id="hl-aria-live-message-container" aria-live="polite" class="visually-hidden"></div><div id="hl-aria-live-alert-container" role="alert" aria-live="assertive" class="visually-hidden"></div></body><grammarly-desktop-integration data-grammarly-shadow-root="true"><template shadowrootmode="open"><style>
|
<div id="hl-aria-live-message-container" aria-live="polite" class="visually-hidden"></div><div id="hl-aria-live-alert-container" role="alert" aria-live="assertive" class="visually-hidden"></div></body><grammarly-desktop-integration data-grammarly-shadow-root="true"><template shadowrootmode="open"><style>
|
||||||
div.grammarly-desktop-integration {
|
div.grammarly-desktop-integration {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
|
@ -1967,4 +1967,4 @@ library.</p>
|
||||||
div.grammarly-desktop-integration:before {
|
div.grammarly-desktop-integration:before {
|
||||||
content: attr(data-content);
|
content: attr(data-content);
|
||||||
}
|
}
|
||||||
</style><div aria-label="grammarly-integration" role="group" tabindex="-1" class="grammarly-desktop-integration" data-content="{"mode":"limited","isActive":false,"isUserDisabled":false}"></div></template></grammarly-desktop-integration></html>
|
</style><div aria-label="grammarly-integration" role="group" tabindex="-1" class="grammarly-desktop-integration" data-content="{"mode":"limited","isActive":false,"isUserDisabled":false}"></div></template></grammarly-desktop-integration></html>
|
||||||
|
|
|
@ -34,9 +34,9 @@ python api_server.py --host localhost --port $PORT_NUMBER --model $PATH_TO_MODEL
|
||||||
### Collect your data
|
### Collect your data
|
||||||
|
|
||||||
For ChatGPT based Agent we support document retrieval and simple sql search.
|
For ChatGPT based Agent we support document retrieval and simple sql search.
|
||||||
If you want to run the demo locally, we provided document retrieval based conversation system built upon langchain. It accept a wide range of documents.
|
If you want to run the demo locally, we provided document retrieval based conversation system built upon langchain. It accept a wide range of documents.
|
||||||
|
|
||||||
Read comments under ./colossalqa/data_loader for more detail
|
Read comments under ./colossalqa/data_loader for more detail
|
||||||
|
|
||||||
### Serving
|
### Serving
|
||||||
Currently use vllm will replace with colossal inference when ready. Please refer class VllmLLM.
|
Currently use vllm will replace with colossal inference when ready. Please refer class VllmLLM.
|
||||||
|
|
|
@ -1,38 +1,38 @@
|
||||||
Your Name
|
Your Name
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit
|
||||||
123 Your Street
|
123 Your Street
|
||||||
Your City, ST 12345
|
Your City, ST 12345
|
||||||
(123) 456-7890
|
(123) 456-7890
|
||||||
no_reply@example.com
|
no_reply@example.com
|
||||||
EXPERIENCE
|
EXPERIENCE
|
||||||
Company, Location — Job Title
|
Company, Location — Job Title
|
||||||
MONTH 20XX - PRESENT
|
MONTH 20XX - PRESENT
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
|
||||||
Company, Location — Job Title
|
Company, Location — Job Title
|
||||||
MONTH 20XX - MONTH 20XX
|
MONTH 20XX - MONTH 20XX
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
|
||||||
Company, Location — Job Title
|
Company, Location — Job Title
|
||||||
MONTH 20XX - MONTH 20XX
|
MONTH 20XX - MONTH 20XX
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
|
||||||
EDUCATION
|
EDUCATION
|
||||||
School Name, Location — Degree
|
School Name, Location — Degree
|
||||||
MONTH 20XX - MONTH 20XX
|
MONTH 20XX - MONTH 20XX
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore.
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore.
|
||||||
School Name, Location — Degree
|
School Name, Location — Degree
|
||||||
MONTH 20XX - MONTH 20XX
|
MONTH 20XX - MONTH 20XX
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam.
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam.
|
||||||
PROJECTS
|
PROJECTS
|
||||||
Project Name — Detail
|
Project Name — Detail
|
||||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
|
Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
|
||||||
SKILLS
|
SKILLS
|
||||||
* Lorem ipsum dolor sit amet.
|
* Lorem ipsum dolor sit amet.
|
||||||
* Consectetuer adipiscing elit.
|
* Consectetuer adipiscing elit.
|
||||||
* Sed diam nonummy nibh euismod tincidunt.
|
* Sed diam nonummy nibh euismod tincidunt.
|
||||||
* Laoreet dolore magna aliquam erat volutpat.
|
* Laoreet dolore magna aliquam erat volutpat.
|
||||||
AWARDS
|
AWARDS
|
||||||
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
|
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
|
||||||
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
|
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
|
||||||
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
|
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
|
||||||
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
|
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
|
||||||
LANGUAGES
|
LANGUAGES
|
||||||
Lorem ipsum, Dolor sit amet, Consectetuer
|
Lorem ipsum, Dolor sit amet, Consectetuer
|
||||||
|
|
|
@ -1,22 +1,27 @@
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from colossalqa.retrieval_conversation_universal import UniversalRetrievalConversation
|
from colossalqa.retrieval_conversation_universal import UniversalRetrievalConversation
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--en_model_path', type=str, default=None)
|
parser.add_argument("--en_model_path", type=str, default=None)
|
||||||
parser.add_argument('--zh_model_path', type=str, default=None)
|
parser.add_argument("--zh_model_path", type=str, default=None)
|
||||||
parser.add_argument('--zh_model_name', type=str, default=None)
|
parser.add_argument("--zh_model_name", type=str, default=None)
|
||||||
parser.add_argument('--en_model_name', type=str, default=None)
|
parser.add_argument("--en_model_name", type=str, default=None)
|
||||||
parser.add_argument('--sql_file_path', type=str, default=None, help='path to the a empty folder for storing sql files for indexing')
|
parser.add_argument(
|
||||||
|
"--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing"
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Will ask for documents path in running time
|
# Will ask for documents path in running time
|
||||||
session = UniversalRetrievalConversation(files_en=None,
|
session = UniversalRetrievalConversation(
|
||||||
files_zh=None,
|
files_en=None,
|
||||||
zh_model_path=args.zh_model_path, en_model_path=args.en_model_path,
|
files_zh=None,
|
||||||
zh_model_name=args.zh_model_name, en_model_name=args.en_model_name,
|
zh_model_path=args.zh_model_path,
|
||||||
sql_file_path=args.sql_file_path
|
en_model_path=args.en_model_path,
|
||||||
)
|
zh_model_name=args.zh_model_name,
|
||||||
|
en_model_name=args.en_model_name,
|
||||||
|
sql_file_path=args.sql_file_path,
|
||||||
|
)
|
||||||
session.start_test_session()
|
session.start_test_session()
|
||||||
|
|
|
@ -5,13 +5,7 @@ from colossalqa.chain.retrieval_qa.base import RetrievalQA
|
||||||
from colossalqa.data_loader.document_loader import DocumentLoader
|
from colossalqa.data_loader.document_loader import DocumentLoader
|
||||||
from colossalqa.memory import ConversationBufferWithSummary
|
from colossalqa.memory import ConversationBufferWithSummary
|
||||||
from colossalqa.mylogging import get_logger
|
from colossalqa.mylogging import get_logger
|
||||||
from colossalqa.prompt.prompt import (
|
from colossalqa.prompt.prompt import ZH_RETRIEVAL_QA_REJECTION_ANSWER, ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS
|
||||||
PROMPT_DISAMBIGUATE_ZH,
|
|
||||||
PROMPT_RETRIEVAL_QA_ZH,
|
|
||||||
SUMMARY_PROMPT_ZH,
|
|
||||||
ZH_RETRIEVAL_QA_REJECTION_ANSWER,
|
|
||||||
ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS,
|
|
||||||
)
|
|
||||||
from colossalqa.retriever import CustomRetriever
|
from colossalqa.retriever import CustomRetriever
|
||||||
from langchain import LLMChain
|
from langchain import LLMChain
|
||||||
from langchain.embeddings import HuggingFaceEmbeddings
|
from langchain.embeddings import HuggingFaceEmbeddings
|
||||||
|
@ -116,13 +110,13 @@ class RAG_ChatBot:
|
||||||
def split_docs(self, documents):
|
def split_docs(self, documents):
|
||||||
doc_splits = self.text_splitter.split_documents(documents)
|
doc_splits = self.text_splitter.split_documents(documents)
|
||||||
return doc_splits
|
return doc_splits
|
||||||
|
|
||||||
def clear_docs(self, **kwargs):
|
def clear_docs(self, **kwargs):
|
||||||
self.documents = []
|
self.documents = []
|
||||||
self.docs_names = []
|
self.docs_names = []
|
||||||
self.info_retriever.clear_documents()
|
self.info_retriever.clear_documents()
|
||||||
self.memory.initiate_document_retrieval_chain(self.llm, kwargs["gen_qa_prompt"], self.info_retriever)
|
self.memory.initiate_document_retrieval_chain(self.llm, kwargs["gen_qa_prompt"], self.info_retriever)
|
||||||
|
|
||||||
def reset_config(self, rag_config):
|
def reset_config(self, rag_config):
|
||||||
self.rag_config = rag_config
|
self.rag_config = rag_config
|
||||||
self.set_embed_model(**self.rag_config["embed"])
|
self.set_embed_model(**self.rag_config["embed"])
|
||||||
|
|
|
@ -115,4 +115,4 @@ python webui.py --http_host "your-backend-api-host" --http_port "your-backend-ap
|
||||||
|
|
||||||
After launching the script, you can upload files and engage with the chatbot through your web browser.
|
After launching the script, you can upload files and engage with the chatbot through your web browser.
|
||||||
|
|
||||||
![ColossalQA Demo](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/colossalqa/new_ui.png)
|
![ColossalQA Demo](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/colossalqa/new_ui.png)
|
||||||
|
|
|
@ -1,58 +1,30 @@
|
||||||
from colossalqa.prompt.prompt import (
|
from colossalqa.prompt.prompt import PROMPT_DISAMBIGUATE_ZH, PROMPT_RETRIEVAL_QA_ZH, SUMMARY_PROMPT_ZH
|
||||||
PROMPT_DISAMBIGUATE_ZH,
|
|
||||||
PROMPT_RETRIEVAL_QA_ZH,
|
|
||||||
SUMMARY_PROMPT_ZH,
|
|
||||||
ZH_RETRIEVAL_QA_REJECTION_ANSWER,
|
|
||||||
ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS,
|
|
||||||
)
|
|
||||||
from colossalqa.text_splitter import ChineseTextSplitter
|
from colossalqa.text_splitter import ChineseTextSplitter
|
||||||
|
|
||||||
ALL_CONFIG = {
|
ALL_CONFIG = {
|
||||||
"embed": {
|
"embed": {
|
||||||
"embed_name": "m3e", # embedding model name
|
"embed_name": "m3e", # embedding model name
|
||||||
"embed_model_name_or_path": "moka-ai/m3e-base", # path to embedding model, could be a local path or a huggingface path
|
"embed_model_name_or_path": "moka-ai/m3e-base", # path to embedding model, could be a local path or a huggingface path
|
||||||
"embed_model_device": {
|
"embed_model_device": {"device": "cpu"},
|
||||||
"device": "cpu"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"model": {
|
"model": {
|
||||||
"mode": "api", # "local" for loading models, "api" for using model api
|
"mode": "api", # "local" for loading models, "api" for using model api
|
||||||
"model_name": "chatgpt_api", # local model name, "chatgpt_api" or "pangu_api"
|
"model_name": "chatgpt_api", # local model name, "chatgpt_api" or "pangu_api"
|
||||||
"model_path": "", # path to the model, could be a local path or a huggingface path. don't need if using an api
|
"model_path": "", # path to the model, could be a local path or a huggingface path. don't need if using an api
|
||||||
"device": {
|
"device": {"device": "cuda"},
|
||||||
"device": "cuda"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"splitter": {
|
|
||||||
"name": ChineseTextSplitter
|
|
||||||
},
|
|
||||||
"retrieval": {
|
|
||||||
"retri_top_k": 3,
|
|
||||||
"retri_kb_file_path": "./", # path to store database files
|
|
||||||
"verbose": True
|
|
||||||
},
|
},
|
||||||
|
"splitter": {"name": ChineseTextSplitter},
|
||||||
|
"retrieval": {"retri_top_k": 3, "retri_kb_file_path": "./", "verbose": True}, # path to store database files
|
||||||
"chain": {
|
"chain": {
|
||||||
"mem_summary_prompt": SUMMARY_PROMPT_ZH, # summary prompt template
|
"mem_summary_prompt": SUMMARY_PROMPT_ZH, # summary prompt template
|
||||||
"mem_human_prefix": "用户",
|
"mem_human_prefix": "用户",
|
||||||
"mem_ai_prefix": "Assistant",
|
"mem_ai_prefix": "Assistant",
|
||||||
"mem_max_tokens": 2000,
|
"mem_max_tokens": 2000,
|
||||||
"mem_llm_kwargs": {
|
"mem_llm_kwargs": {"max_new_tokens": 50, "temperature": 1, "do_sample": True},
|
||||||
"max_new_tokens": 50,
|
|
||||||
"temperature": 1,
|
|
||||||
"do_sample": True
|
|
||||||
},
|
|
||||||
"disambig_prompt": PROMPT_DISAMBIGUATE_ZH, # disambiguate prompt template
|
"disambig_prompt": PROMPT_DISAMBIGUATE_ZH, # disambiguate prompt template
|
||||||
"disambig_llm_kwargs": {
|
"disambig_llm_kwargs": {"max_new_tokens": 30, "temperature": 1, "do_sample": True},
|
||||||
"max_new_tokens": 30,
|
"gen_llm_kwargs": {"max_new_tokens": 100, "temperature": 1, "do_sample": True},
|
||||||
"temperature": 1,
|
|
||||||
"do_sample": True
|
|
||||||
},
|
|
||||||
"gen_llm_kwargs": {
|
|
||||||
"max_new_tokens": 100,
|
|
||||||
"temperature": 1,
|
|
||||||
"do_sample": True
|
|
||||||
},
|
|
||||||
"gen_qa_prompt": PROMPT_RETRIEVAL_QA_ZH, # generation prompt template
|
"gen_qa_prompt": PROMPT_RETRIEVAL_QA_ZH, # generation prompt template
|
||||||
"verbose": True
|
"verbose": True,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,27 +1,18 @@
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
|
||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
|
|
||||||
from colossalqa.local.llm import ColossalAPI, ColossalLLM
|
|
||||||
from colossalqa.data_loader.document_loader import DocumentLoader
|
|
||||||
from colossalqa.mylogging import get_logger
|
|
||||||
from colossalqa.retrieval_conversation_zh import ChineseRetrievalConversation
|
|
||||||
from colossalqa.retriever import CustomRetriever
|
|
||||||
from enum import Enum
|
|
||||||
from fastapi import FastAPI, Request
|
|
||||||
from langchain.embeddings import HuggingFaceEmbeddings
|
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
import uvicorn
|
||||||
|
from colossalqa.local.llm import ColossalAPI, ColossalLLM
|
||||||
|
from colossalqa.mylogging import get_logger
|
||||||
|
from fastapi import FastAPI, Request
|
||||||
|
from pydantic import BaseModel
|
||||||
from RAG_ChatBot import RAG_ChatBot
|
from RAG_ChatBot import RAG_ChatBot
|
||||||
from utils import DocAction
|
from utils import DocAction
|
||||||
|
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
def parseArgs():
|
def parseArgs():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--http_host", default="0.0.0.0")
|
parser.add_argument("--http_host", default="0.0.0.0")
|
||||||
|
@ -36,6 +27,7 @@ class DocUpdateReq(BaseModel):
|
||||||
doc_files: Union[List[str], str, None] = None
|
doc_files: Union[List[str], str, None] = None
|
||||||
action: DocAction = DocAction.ADD
|
action: DocAction = DocAction.ADD
|
||||||
|
|
||||||
|
|
||||||
class GenerationTaskReq(BaseModel):
|
class GenerationTaskReq(BaseModel):
|
||||||
user_input: str
|
user_input: str
|
||||||
|
|
||||||
|
@ -45,7 +37,7 @@ def update_docs(data: DocUpdateReq, request: Request):
|
||||||
if data.action == "add":
|
if data.action == "add":
|
||||||
if isinstance(data.doc_files, str):
|
if isinstance(data.doc_files, str):
|
||||||
data.doc_files = [data.doc_files]
|
data.doc_files = [data.doc_files]
|
||||||
chatbot.load_doc_from_files(files = data.doc_files)
|
chatbot.load_doc_from_files(files=data.doc_files)
|
||||||
all_docs = ""
|
all_docs = ""
|
||||||
for doc in chatbot.docs_names:
|
for doc in chatbot.docs_names:
|
||||||
all_docs += f"\t{doc}\n\n"
|
all_docs += f"\t{doc}\n\n"
|
||||||
|
@ -79,17 +71,18 @@ if __name__ == "__main__":
|
||||||
elif all_config["model"]["mode"] == "api":
|
elif all_config["model"]["mode"] == "api":
|
||||||
if model_name == "pangu_api":
|
if model_name == "pangu_api":
|
||||||
from colossalqa.local.pangu_llm import Pangu
|
from colossalqa.local.pangu_llm import Pangu
|
||||||
|
|
||||||
gen_config = {
|
gen_config = {
|
||||||
"user": "User",
|
"user": "User",
|
||||||
"max_tokens": all_config["chain"]["disambig_llm_kwargs"]["max_new_tokens"],
|
"max_tokens": all_config["chain"]["disambig_llm_kwargs"]["max_new_tokens"],
|
||||||
"temperature": all_config["chain"]["disambig_llm_kwargs"]["temperature"],
|
"temperature": all_config["chain"]["disambig_llm_kwargs"]["temperature"],
|
||||||
"n": 1 # the number of responses generated
|
"n": 1, # the number of responses generated
|
||||||
}
|
}
|
||||||
llm = Pangu(gen_config=gen_config)
|
llm = Pangu(gen_config=gen_config)
|
||||||
llm.set_auth_config() # verify user's auth info here
|
llm.set_auth_config() # verify user's auth info here
|
||||||
elif model_name == "chatgpt_api":
|
elif model_name == "chatgpt_api":
|
||||||
from langchain.llms import OpenAI
|
from langchain.llms import OpenAI
|
||||||
|
|
||||||
llm = OpenAI()
|
llm = OpenAI()
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported mode.")
|
raise ValueError("Unsupported mode.")
|
||||||
|
|
|
@ -1,24 +1,26 @@
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import requests
|
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
|
import requests
|
||||||
from utils import DocAction
|
from utils import DocAction
|
||||||
|
|
||||||
|
|
||||||
def parseArgs():
|
def parseArgs():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--http_host", default="0.0.0.0")
|
parser.add_argument("--http_host", default="0.0.0.0")
|
||||||
parser.add_argument("--http_port", type=int, default=13666)
|
parser.add_argument("--http_port", type=int, default=13666)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def get_response(data, url):
|
def get_response(data, url):
|
||||||
headers = {"Content-type": "application/json"}
|
headers = {"Content-type": "application/json"}
|
||||||
response = requests.post(url, json=data, headers=headers)
|
response = requests.post(url, json=data, headers=headers)
|
||||||
response = json.loads(response.content)
|
response = json.loads(response.content)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def add_text(history, text):
|
def add_text(history, text):
|
||||||
history = history + [(text, None)]
|
history = history + [(text, None)]
|
||||||
return history, gr.update(value=None, interactive=True)
|
return history, gr.update(value=None, interactive=True)
|
||||||
|
@ -28,35 +30,28 @@ def add_file(history, files):
|
||||||
files_string = "\n".join([os.path.basename(file.name) for file in files])
|
files_string = "\n".join([os.path.basename(file.name) for file in files])
|
||||||
|
|
||||||
doc_files = [file.name for file in files]
|
doc_files = [file.name for file in files]
|
||||||
data = {
|
data = {"doc_files": doc_files, "action": DocAction.ADD}
|
||||||
"doc_files": doc_files,
|
|
||||||
"action": DocAction.ADD
|
|
||||||
}
|
|
||||||
response = get_response(data, update_url)["response"]
|
response = get_response(data, update_url)["response"]
|
||||||
history = history + [(files_string, response)]
|
history = history + [(files_string, response)]
|
||||||
return history
|
return history
|
||||||
|
|
||||||
def bot(history):
|
|
||||||
data = {
|
def bot(history):
|
||||||
"user_input": history[-1][0].strip()
|
data = {"user_input": history[-1][0].strip()}
|
||||||
}
|
|
||||||
response = get_response(data, gen_url)
|
response = get_response(data, gen_url)
|
||||||
|
|
||||||
if response["error"] != "":
|
if response["error"] != "":
|
||||||
raise gr.Error(response["error"])
|
raise gr.Error(response["error"])
|
||||||
|
|
||||||
history[-1][1] = response["response"]
|
history[-1][1] = response["response"]
|
||||||
yield history
|
yield history
|
||||||
|
|
||||||
|
|
||||||
def restart(chatbot, txt):
|
def restart(chatbot, txt):
|
||||||
# Reset the conversation state and clear the chat history
|
# Reset the conversation state and clear the chat history
|
||||||
data = {
|
data = {"doc_files": "", "action": DocAction.CLEAR}
|
||||||
"doc_files": "",
|
get_response(data, update_url)
|
||||||
"action": DocAction.CLEAR
|
|
||||||
}
|
|
||||||
response = get_response(data, update_url)
|
|
||||||
|
|
||||||
return gr.update(value=None), gr.update(value=None, interactive=True)
|
return gr.update(value=None), gr.update(value=None, interactive=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,7 +92,7 @@ with gr.Blocks(css=CSS) as demo:
|
||||||
|
|
||||||
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(bot, chatbot, chatbot)
|
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(bot, chatbot, chatbot)
|
||||||
# Clear the original textbox
|
# Clear the original textbox
|
||||||
txt_msg.then(lambda: gr.update(value=None, interactive=True), None, [txt], queue=False)
|
txt_msg.then(lambda: gr.update(value=None, interactive=True), None, [txt], queue=False)
|
||||||
# Click Upload Button: 1. upload files 2. send config to backend, initalize model 3. get response "conversation_ready" = True/False
|
# Click Upload Button: 1. upload files 2. send config to backend, initalize model 3. get response "conversation_ready" = True/False
|
||||||
file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False)
|
file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
[pytest]
|
[pytest]
|
||||||
markers =
|
markers =
|
||||||
dist: tests which are run in a multi-GPU or multi-machine environment (at least 4 GPUs)
|
dist: tests which are run in a multi-GPU or multi-machine environment (at least 4 GPUs)
|
||||||
largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs)
|
largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs)
|
||||||
|
|
|
@ -1,21 +1,21 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from colossalqa.data_loader.document_loader import DocumentLoader
|
from colossalqa.data_loader.document_loader import DocumentLoader
|
||||||
|
|
||||||
|
|
||||||
def test_add_document():
|
def test_add_document():
|
||||||
PATH = os.environ.get('TEST_DOCUMENT_LOADER_DATA_PATH')
|
PATH = os.environ.get("TEST_DOCUMENT_LOADER_DATA_PATH")
|
||||||
files = [[PATH, 'all data']]
|
files = [[PATH, "all data"]]
|
||||||
document_loader = DocumentLoader(files)
|
document_loader = DocumentLoader(files)
|
||||||
documents = document_loader.all_data
|
documents = document_loader.all_data
|
||||||
all_files = []
|
all_files = []
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
assert isinstance(doc.page_content, str)==True
|
assert isinstance(doc.page_content, str) == True
|
||||||
if doc.metadata['source'] not in all_files:
|
if doc.metadata["source"] not in all_files:
|
||||||
all_files.append(doc.metadata['source'])
|
all_files.append(doc.metadata["source"])
|
||||||
print(all_files)
|
print(all_files)
|
||||||
assert len(all_files) == 6
|
assert len(all_files) == 6
|
||||||
|
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__ == "__main__":
|
||||||
test_add_document()
|
test_add_document()
|
||||||
|
|
||||||
|
|
|
@ -4,56 +4,44 @@ from colossalqa.retrieval_conversation_universal import UniversalRetrievalConver
|
||||||
|
|
||||||
|
|
||||||
def test_en_retrievalQA():
|
def test_en_retrievalQA():
|
||||||
data_path_en = os.environ.get('TEST_DATA_PATH_EN')
|
data_path_en = os.environ.get("TEST_DATA_PATH_EN")
|
||||||
data_path_zh = os.environ.get('TEST_DATA_PATH_ZH')
|
data_path_zh = os.environ.get("TEST_DATA_PATH_ZH")
|
||||||
en_model_path = os.environ.get('EN_MODEL_PATH')
|
en_model_path = os.environ.get("EN_MODEL_PATH")
|
||||||
zh_model_path = os.environ.get('ZH_MODEL_PATH')
|
zh_model_path = os.environ.get("ZH_MODEL_PATH")
|
||||||
zh_model_name = os.environ.get('ZH_MODEL_NAME')
|
zh_model_name = os.environ.get("ZH_MODEL_NAME")
|
||||||
en_model_name = os.environ.get('EN_MODEL_NAME')
|
en_model_name = os.environ.get("EN_MODEL_NAME")
|
||||||
sql_file_path = os.environ.get('SQL_FILE_PATH')
|
sql_file_path = os.environ.get("SQL_FILE_PATH")
|
||||||
qa_session = UniversalRetrievalConversation(files_en=[{
|
qa_session = UniversalRetrievalConversation(
|
||||||
'data_path': data_path_en,
|
files_en=[{"data_path": data_path_en, "name": "company information", "separator": "\n"}],
|
||||||
'name': 'company information',
|
files_zh=[{"data_path": data_path_zh, "name": "company information", "separator": "\n"}],
|
||||||
'separator': '\n'
|
zh_model_path=zh_model_path,
|
||||||
}],
|
en_model_path=en_model_path,
|
||||||
files_zh=[{
|
zh_model_name=zh_model_name,
|
||||||
'data_path': data_path_zh,
|
en_model_name=en_model_name,
|
||||||
'name': 'company information',
|
sql_file_path=sql_file_path,
|
||||||
'separator': '\n'
|
)
|
||||||
}],
|
ans = qa_session.run("which company runs business in hotel industry?", which_language="en")
|
||||||
zh_model_path=zh_model_path,
|
|
||||||
en_model_path=en_model_path,
|
|
||||||
zh_model_name=zh_model_name,
|
|
||||||
en_model_name=en_model_name,
|
|
||||||
sql_file_path=sql_file_path)
|
|
||||||
ans = qa_session.run("which company runs business in hotel industry?", which_language='en')
|
|
||||||
print(ans)
|
print(ans)
|
||||||
|
|
||||||
|
|
||||||
def test_zh_retrievalQA():
|
def test_zh_retrievalQA():
|
||||||
data_path_en = os.environ.get('TEST_DATA_PATH_EN')
|
data_path_en = os.environ.get("TEST_DATA_PATH_EN")
|
||||||
data_path_zh = os.environ.get('TEST_DATA_PATH_ZH')
|
data_path_zh = os.environ.get("TEST_DATA_PATH_ZH")
|
||||||
en_model_path = os.environ.get('EN_MODEL_PATH')
|
en_model_path = os.environ.get("EN_MODEL_PATH")
|
||||||
zh_model_path = os.environ.get('ZH_MODEL_PATH')
|
zh_model_path = os.environ.get("ZH_MODEL_PATH")
|
||||||
zh_model_name = os.environ.get('ZH_MODEL_NAME')
|
zh_model_name = os.environ.get("ZH_MODEL_NAME")
|
||||||
en_model_name = os.environ.get('EN_MODEL_NAME')
|
en_model_name = os.environ.get("EN_MODEL_NAME")
|
||||||
sql_file_path = os.environ.get('SQL_FILE_PATH')
|
sql_file_path = os.environ.get("SQL_FILE_PATH")
|
||||||
qa_session = UniversalRetrievalConversation(files_en=[{
|
qa_session = UniversalRetrievalConversation(
|
||||||
'data_path': data_path_en,
|
files_en=[{"data_path": data_path_en, "name": "company information", "separator": "\n"}],
|
||||||
'name': 'company information',
|
files_zh=[{"data_path": data_path_zh, "name": "company information", "separator": "\n"}],
|
||||||
'separator': '\n'
|
zh_model_path=zh_model_path,
|
||||||
}],
|
en_model_path=en_model_path,
|
||||||
files_zh=[{
|
zh_model_name=zh_model_name,
|
||||||
'data_path': data_path_zh,
|
en_model_name=en_model_name,
|
||||||
'name': 'company information',
|
sql_file_path=sql_file_path,
|
||||||
'separator': '\n'
|
)
|
||||||
}],
|
ans = qa_session.run("哪家公司在经营酒店业务?", which_language="zh")
|
||||||
zh_model_path=zh_model_path,
|
|
||||||
en_model_path=en_model_path,
|
|
||||||
zh_model_name=zh_model_name,
|
|
||||||
en_model_name=en_model_name,
|
|
||||||
sql_file_path=sql_file_path)
|
|
||||||
ans = qa_session.run("哪家公司在经营酒店业务?", which_language='zh')
|
|
||||||
print(ans)
|
print(ans)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
0.0.1
|
0.0.1
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from .initialize import launch, launch_from_openmpi, launch_from_slurm, launch_from_torch
|
|
||||||
from . import accelerator
|
from . import accelerator
|
||||||
|
from .initialize import launch, launch_from_openmpi, launch_from_slurm, launch_from_torch
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# .version will be created by setup.py
|
# .version will be created by setup.py
|
||||||
|
|
|
@ -27,7 +27,7 @@ from torch.optim import Optimizer
|
||||||
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
|
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
from colossalai.checkpoint_io import CheckpointIO, GeneralCheckpointIO, utils, CheckpointIndexFile
|
from colossalai.checkpoint_io import CheckpointIndexFile, CheckpointIO, GeneralCheckpointIO, utils
|
||||||
from colossalai.cluster import DistCoordinator
|
from colossalai.cluster import DistCoordinator
|
||||||
from colossalai.interface import ModelWrapper, OptimizerWrapper
|
from colossalai.interface import ModelWrapper, OptimizerWrapper
|
||||||
|
|
||||||
|
@ -93,9 +93,7 @@ class TorchFSDPCheckpointIO(GeneralCheckpointIO):
|
||||||
|
|
||||||
Path(checkpoint_path).mkdir(parents=True, exist_ok=True)
|
Path(checkpoint_path).mkdir(parents=True, exist_ok=True)
|
||||||
with FSDP.state_dict_type(
|
with FSDP.state_dict_type(
|
||||||
model.unwrap(),
|
model.unwrap(), StateDictType.FULL_STATE_DICT, FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
|
||||||
StateDictType.FULL_STATE_DICT,
|
|
||||||
FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
|
|
||||||
):
|
):
|
||||||
state_dict = model.unwrap().state_dict()
|
state_dict = model.unwrap().state_dict()
|
||||||
|
|
||||||
|
@ -172,7 +170,7 @@ class TorchFSDPCheckpointIO(GeneralCheckpointIO):
|
||||||
with FSDP.state_dict_type(
|
with FSDP.state_dict_type(
|
||||||
optimizer.unwrap_model().unwrap(),
|
optimizer.unwrap_model().unwrap(),
|
||||||
StateDictType.FULL_STATE_DICT,
|
StateDictType.FULL_STATE_DICT,
|
||||||
FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
|
FullStateDictConfig(offload_to_cpu=True, rank0_only=True),
|
||||||
):
|
):
|
||||||
fsdp_optim_state = FSDP.full_optim_state_dict(
|
fsdp_optim_state = FSDP.full_optim_state_dict(
|
||||||
optimizer.unwrap_model().unwrap(), optim=optimizer, rank0_only=True
|
optimizer.unwrap_model().unwrap(), optim=optimizer, rank0_only=True
|
||||||
|
@ -241,7 +239,6 @@ class TorchFSDPCheckpointIO(GeneralCheckpointIO):
|
||||||
)
|
)
|
||||||
optimizer.load_state_dict(fsdp_state)
|
optimizer.load_state_dict(fsdp_state)
|
||||||
|
|
||||||
|
|
||||||
def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
|
def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
|
||||||
"""
|
"""
|
||||||
Save model to checkpoint but only on master process.
|
Save model to checkpoint but only on master process.
|
||||||
|
|
|
@ -294,6 +294,7 @@ def shard_optimizer_checkpoint(state_dict: dict, max_shard_size: int = 1024) ->
|
||||||
# Helper functions for saving state dict
|
# Helper functions for saving state dict
|
||||||
# ======================================
|
# ======================================
|
||||||
|
|
||||||
|
|
||||||
def save_state_dict(state_dict: dict, checkpoint_file_path: str, use_safetensors: bool) -> None:
|
def save_state_dict(state_dict: dict, checkpoint_file_path: str, use_safetensors: bool) -> None:
|
||||||
"""
|
"""
|
||||||
Save state dict to checkpoint.
|
Save state dict to checkpoint.
|
||||||
|
@ -305,7 +306,7 @@ def save_state_dict(state_dict: dict, checkpoint_file_path: str, use_safetensors
|
||||||
"""
|
"""
|
||||||
# Move all tensors in the state_dict to CPU before saving to avoid serialization issues
|
# Move all tensors in the state_dict to CPU before saving to avoid serialization issues
|
||||||
state_dict_cpu = tree_map(lambda x: x.cpu() if torch.is_tensor(x) else x, state_dict)
|
state_dict_cpu = tree_map(lambda x: x.cpu() if torch.is_tensor(x) else x, state_dict)
|
||||||
|
|
||||||
if use_safetensors:
|
if use_safetensors:
|
||||||
assert is_safetensors_available(), "safetensors is not available."
|
assert is_safetensors_available(), "safetensors is not available."
|
||||||
assert checkpoint_file_path.endswith(
|
assert checkpoint_file_path.endswith(
|
||||||
|
|
|
@ -225,4 +225,3 @@ class ProcessGroupMesh:
|
||||||
# no need to cache it explicitly, since it will be cached in `create_group_along_axis`
|
# no need to cache it explicitly, since it will be cached in `create_group_along_axis`
|
||||||
return self.create_group_along_axis(axis, indices_at_axis, backend=backend)
|
return self.create_group_along_axis(axis, indices_at_axis, backend=backend)
|
||||||
return self._ranks_to_group[ranks_in_group]
|
return self._ranks_to_group[ranks_in_group]
|
||||||
|
|
|
@ -29,13 +29,17 @@ except:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from colossalai.kernel.triton.flash_decoding import token_flash_decoding
|
from colossalai.kernel.triton.flash_decoding import token_flash_decoding
|
||||||
|
|
||||||
HAS_TRITON_FLASH_DECODING_KERNEL = True
|
HAS_TRITON_FLASH_DECODING_KERNEL = True
|
||||||
except:
|
except:
|
||||||
print("no triton flash decoding support, please install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8")
|
print(
|
||||||
|
"no triton flash decoding support, please install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8"
|
||||||
|
)
|
||||||
HAS_TRITON_FLASH_DECODING_KERNEL = False
|
HAS_TRITON_FLASH_DECODING_KERNEL = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from flash_attn import flash_attn_with_kvcache
|
from flash_attn import flash_attn_with_kvcache
|
||||||
|
|
||||||
HAS_FLASH_KERNEL = True
|
HAS_FLASH_KERNEL = True
|
||||||
except:
|
except:
|
||||||
HAS_FLASH_KERNEL = False
|
HAS_FLASH_KERNEL = False
|
||||||
|
@ -48,6 +52,7 @@ def rotate_half(x):
|
||||||
x2 = x[..., x.shape[-1] // 2 :]
|
x2 = x[..., x.shape[-1] // 2 :]
|
||||||
return torch.cat((-x2, x1), dim=-1)
|
return torch.cat((-x2, x1), dim=-1)
|
||||||
|
|
||||||
|
|
||||||
def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
|
def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
|
||||||
# The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
|
# The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
|
||||||
cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
|
cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
|
||||||
|
@ -96,17 +101,22 @@ def llama_triton_context_attention(
|
||||||
infer_state.max_len_in_batch,
|
infer_state.max_len_in_batch,
|
||||||
)
|
)
|
||||||
|
|
||||||
def llama_triton_token_attention(query_states, attn_output, infer_state, num_key_value_groups=1, q_head_num = -1, head_dim = -1):
|
|
||||||
|
def llama_triton_token_attention(
|
||||||
|
query_states, attn_output, infer_state, num_key_value_groups=1, q_head_num=-1, head_dim=-1
|
||||||
|
):
|
||||||
if HAS_TRITON_FLASH_DECODING_KERNEL and q_head_num != -1 and head_dim != -1:
|
if HAS_TRITON_FLASH_DECODING_KERNEL and q_head_num != -1 and head_dim != -1:
|
||||||
token_flash_decoding(q = query_states,
|
token_flash_decoding(
|
||||||
o_tensor = attn_output,
|
q=query_states,
|
||||||
infer_state = infer_state,
|
o_tensor=attn_output,
|
||||||
q_head_num = q_head_num,
|
infer_state=infer_state,
|
||||||
head_dim = head_dim,
|
q_head_num=q_head_num,
|
||||||
cache_k = infer_state.cache_manager.key_buffer[infer_state.decode_layer_id],
|
head_dim=head_dim,
|
||||||
cache_v = infer_state.cache_manager.value_buffer[infer_state.decode_layer_id])
|
cache_k=infer_state.cache_manager.key_buffer[infer_state.decode_layer_id],
|
||||||
return
|
cache_v=infer_state.cache_manager.value_buffer[infer_state.decode_layer_id],
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
if num_key_value_groups == 1:
|
if num_key_value_groups == 1:
|
||||||
token_attention_fwd(
|
token_attention_fwd(
|
||||||
query_states,
|
query_states,
|
||||||
|
@ -459,14 +469,15 @@ class LlamaInferenceForwards:
|
||||||
)
|
)
|
||||||
|
|
||||||
if HAS_LIGHTLLM_KERNEL:
|
if HAS_LIGHTLLM_KERNEL:
|
||||||
|
|
||||||
attn_output = torch.empty_like(query_states)
|
attn_output = torch.empty_like(query_states)
|
||||||
llama_triton_token_attention(query_states = query_states,
|
llama_triton_token_attention(
|
||||||
attn_output = attn_output,
|
query_states=query_states,
|
||||||
infer_state = infer_state,
|
attn_output=attn_output,
|
||||||
num_key_value_groups = self.num_key_value_groups,
|
infer_state=infer_state,
|
||||||
q_head_num = q_len * self.num_heads,
|
num_key_value_groups=self.num_key_value_groups,
|
||||||
head_dim = self.head_dim)
|
q_head_num=q_len * self.num_heads,
|
||||||
|
head_dim=self.head_dim,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.num_heads // self.num_key_value_heads
|
self.num_heads // self.num_key_value_heads
|
||||||
cache_k = infer_state.cache_manager.key_buffer[infer_state.decode_layer_id]
|
cache_k = infer_state.cache_manager.key_buffer[infer_state.decode_layer_id]
|
||||||
|
|
|
@ -18,15 +18,15 @@ from .gptq_op import CaiGPTQLinearOp
|
||||||
HAS_GPTQ_CUDA = False
|
HAS_GPTQ_CUDA = False
|
||||||
try:
|
try:
|
||||||
from colossalai.kernel.op_builder.gptq import GPTQBuilder
|
from colossalai.kernel.op_builder.gptq import GPTQBuilder
|
||||||
|
|
||||||
gptq_cuda = GPTQBuilder().load()
|
gptq_cuda = GPTQBuilder().load()
|
||||||
HAS_GPTQ_CUDA = True
|
HAS_GPTQ_CUDA = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
warnings.warn('CUDA gptq is not installed')
|
warnings.warn("CUDA gptq is not installed")
|
||||||
HAS_GPTQ_CUDA = False
|
HAS_GPTQ_CUDA = False
|
||||||
|
|
||||||
|
|
||||||
class CaiQuantLinear(nn.Module):
|
class CaiQuantLinear(nn.Module):
|
||||||
|
|
||||||
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
|
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if bits not in [2, 4, 8]:
|
if bits not in [2, 4, 8]:
|
||||||
|
@ -37,23 +37,28 @@ class CaiQuantLinear(nn.Module):
|
||||||
self.maxq = 2**self.bits - 1
|
self.maxq = 2**self.bits - 1
|
||||||
self.groupsize = groupsize if groupsize != -1 else infeatures
|
self.groupsize = groupsize if groupsize != -1 else infeatures
|
||||||
|
|
||||||
self.register_buffer('qweight', torch.zeros((infeatures // 32 * self.bits, outfeatures), dtype=torch.int32))
|
self.register_buffer("qweight", torch.zeros((infeatures // 32 * self.bits, outfeatures), dtype=torch.int32))
|
||||||
self.register_buffer(
|
self.register_buffer(
|
||||||
'qzeros',
|
"qzeros",
|
||||||
torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures // 32 * self.bits), dtype=torch.int32))
|
torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures // 32 * self.bits), dtype=torch.int32),
|
||||||
self.register_buffer('scales',
|
)
|
||||||
torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures), dtype=torch.float16))
|
self.register_buffer(
|
||||||
|
"scales", torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures), dtype=torch.float16)
|
||||||
|
)
|
||||||
if row_split:
|
if row_split:
|
||||||
self.register_buffer(
|
self.register_buffer(
|
||||||
'g_idx',
|
"g_idx",
|
||||||
torch.tensor([(i + (tp_rank * self.infeatures)) // self.groupsize for i in range(infeatures)],
|
torch.tensor(
|
||||||
dtype=torch.int32))
|
[(i + (tp_rank * self.infeatures)) // self.groupsize for i in range(infeatures)], dtype=torch.int32
|
||||||
|
),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.register_buffer('g_idx',
|
self.register_buffer(
|
||||||
torch.tensor([i // self.groupsize for i in range(infeatures)], dtype=torch.int32))
|
"g_idx", torch.tensor([i // self.groupsize for i in range(infeatures)], dtype=torch.int32)
|
||||||
|
)
|
||||||
|
|
||||||
if bias:
|
if bias:
|
||||||
self.register_buffer('bias', torch.zeros((outfeatures), dtype=torch.float16))
|
self.register_buffer("bias", torch.zeros((outfeatures), dtype=torch.float16))
|
||||||
else:
|
else:
|
||||||
self.bias = None
|
self.bias = None
|
||||||
|
|
||||||
|
@ -66,9 +71,11 @@ class CaiQuantLinear(nn.Module):
|
||||||
self.row_split = row_split
|
self.row_split = row_split
|
||||||
|
|
||||||
def pack(self, linear, scales, zeros, g_idx=None):
|
def pack(self, linear, scales, zeros, g_idx=None):
|
||||||
|
g_idx = (
|
||||||
g_idx = g_idx.clone() if g_idx is not None else torch.tensor(
|
g_idx.clone()
|
||||||
[i // self.groupsize for i in range(self.infeatures)], dtype=torch.int32)
|
if g_idx is not None
|
||||||
|
else torch.tensor([i // self.groupsize for i in range(self.infeatures)], dtype=torch.int32)
|
||||||
|
)
|
||||||
|
|
||||||
scales = scales.t().contiguous()
|
scales = scales.t().contiguous()
|
||||||
zeros = zeros.t().contiguous()
|
zeros = zeros.t().contiguous()
|
||||||
|
@ -79,7 +86,6 @@ class CaiQuantLinear(nn.Module):
|
||||||
if linear.bias is not None:
|
if linear.bias is not None:
|
||||||
self.bias = linear.bias.clone().half()
|
self.bias = linear.bias.clone().half()
|
||||||
|
|
||||||
wn = 8
|
|
||||||
pbits = 32
|
pbits = 32
|
||||||
ptype = torch.int32
|
ptype = torch.int32
|
||||||
unsign_type = np.uint32
|
unsign_type = np.uint32
|
||||||
|
@ -88,9 +94,10 @@ class CaiQuantLinear(nn.Module):
|
||||||
intweight = []
|
intweight = []
|
||||||
for idx in range(self.infeatures):
|
for idx in range(self.infeatures):
|
||||||
intweight.append(
|
intweight.append(
|
||||||
torch.round(
|
torch.round((linear.weight.data[:, idx] + scale_zeros[g_idx[idx]]) / half_scales[g_idx[idx]]).to(ptype)[
|
||||||
(linear.weight.data[:, idx] + scale_zeros[g_idx[idx]]) / half_scales[g_idx[idx]]).to(ptype)[:,
|
:, None
|
||||||
None])
|
]
|
||||||
|
)
|
||||||
intweight = torch.cat(intweight, dim=1)
|
intweight = torch.cat(intweight, dim=1)
|
||||||
intweight = intweight.t().contiguous()
|
intweight = intweight.t().contiguous()
|
||||||
intweight = intweight.numpy().astype(unsign_type)
|
intweight = intweight.numpy().astype(unsign_type)
|
||||||
|
@ -109,7 +116,7 @@ class CaiQuantLinear(nn.Module):
|
||||||
raise NotImplementedError("Only 2,4,8 bits are supported.")
|
raise NotImplementedError("Only 2,4,8 bits are supported.")
|
||||||
qweight = qweight.astype(sign_type)
|
qweight = qweight.astype(sign_type)
|
||||||
qweight1 = torch.from_numpy(qweight)
|
qweight1 = torch.from_numpy(qweight)
|
||||||
qweight1 = qweight1.contiguous() #.to("cuda")
|
qweight1 = qweight1.contiguous() # .to("cuda")
|
||||||
self.qweight.data.copy_(qweight1)
|
self.qweight.data.copy_(qweight1)
|
||||||
|
|
||||||
qzeros = np.zeros((zeros.shape[0], zeros.shape[1] // pbits * self.bits), dtype=unsign_type)
|
qzeros = np.zeros((zeros.shape[0], zeros.shape[1] // pbits * self.bits), dtype=unsign_type)
|
||||||
|
@ -140,17 +147,20 @@ class CaiQuantLinear(nn.Module):
|
||||||
self.q4_width = self.qweight.shape[1]
|
self.q4_width = self.qweight.shape[1]
|
||||||
if self.g_idx is not None:
|
if self.g_idx is not None:
|
||||||
if self.row_split and torch.equal(
|
if self.row_split and torch.equal(
|
||||||
self.g_idx,
|
self.g_idx,
|
||||||
torch.tensor(
|
torch.tensor(
|
||||||
[(i + (self.tp_rank * self.infeatures)) // self.groupsize for i in range(self.infeatures)],
|
[(i + (self.tp_rank * self.infeatures)) // self.groupsize for i in range(self.infeatures)],
|
||||||
dtype=torch.int32,
|
dtype=torch.int32,
|
||||||
device=self.g_idx.device)):
|
device=self.g_idx.device,
|
||||||
|
),
|
||||||
|
):
|
||||||
self.g_idx = None
|
self.g_idx = None
|
||||||
elif torch.equal(
|
elif torch.equal(
|
||||||
self.g_idx,
|
self.g_idx,
|
||||||
torch.tensor([i // self.groupsize for i in range(self.infeatures)],
|
torch.tensor(
|
||||||
dtype=torch.int32,
|
[i // self.groupsize for i in range(self.infeatures)], dtype=torch.int32, device=self.g_idx.device
|
||||||
device=self.g_idx.device)):
|
),
|
||||||
|
):
|
||||||
self.g_idx = None
|
self.g_idx = None
|
||||||
|
|
||||||
if self.g_idx is not None:
|
if self.g_idx is not None:
|
||||||
|
@ -165,7 +175,6 @@ class CaiQuantLinear(nn.Module):
|
||||||
outshape = x.shape[:-1] + (self.outfeatures,)
|
outshape = x.shape[:-1] + (self.outfeatures,)
|
||||||
|
|
||||||
if HAS_GPTQ_CUDA and self.bits == 4:
|
if HAS_GPTQ_CUDA and self.bits == 4:
|
||||||
|
|
||||||
if self.q4 is None:
|
if self.q4 is None:
|
||||||
self.init_q4()
|
self.init_q4()
|
||||||
|
|
||||||
|
@ -191,7 +200,6 @@ class CaiQuantLinear(nn.Module):
|
||||||
|
|
||||||
|
|
||||||
def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, split_num=1):
|
def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, split_num=1):
|
||||||
|
|
||||||
qweights = gptq_linear.qweight.split(gptq_linear.out_features // split_num, dim=-1)
|
qweights = gptq_linear.qweight.split(gptq_linear.out_features // split_num, dim=-1)
|
||||||
qzeros = gptq_linear.qzeros.split(gptq_linear.out_features // (32 // cai_linear.bits) // split_num, dim=-1)
|
qzeros = gptq_linear.qzeros.split(gptq_linear.out_features // (32 // cai_linear.bits) // split_num, dim=-1)
|
||||||
scales = gptq_linear.scales.split(gptq_linear.out_features // split_num, dim=-1)
|
scales = gptq_linear.scales.split(gptq_linear.out_features // split_num, dim=-1)
|
||||||
|
@ -203,24 +211,24 @@ def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, split_num=1
|
||||||
zero_split_block = cai_linear.outfeatures // (32 // cai_linear.bits) // split_num
|
zero_split_block = cai_linear.outfeatures // (32 // cai_linear.bits) // split_num
|
||||||
|
|
||||||
for i in range(split_num):
|
for i in range(split_num):
|
||||||
cai_linear.qweight[:, i * cai_split_out_features:(i + 1) *
|
cai_linear.qweight[:, i * cai_split_out_features : (i + 1) * cai_split_out_features] = qweights[i][
|
||||||
cai_split_out_features] = qweights[i][:, tp_rank * cai_split_out_features:(tp_rank + 1) *
|
:, tp_rank * cai_split_out_features : (tp_rank + 1) * cai_split_out_features
|
||||||
cai_split_out_features]
|
]
|
||||||
cai_linear.qzeros[:, i * zero_split_block:(i + 1) *
|
cai_linear.qzeros[:, i * zero_split_block : (i + 1) * zero_split_block] = qzeros[i][
|
||||||
zero_split_block] = qzeros[i][:, tp_rank * zero_split_block:(tp_rank + 1) * zero_split_block]
|
:, tp_rank * zero_split_block : (tp_rank + 1) * zero_split_block
|
||||||
cai_linear.scales[:, i * cai_split_out_features:(i + 1) *
|
]
|
||||||
cai_split_out_features] = scales[i][:, tp_rank * cai_split_out_features:(tp_rank + 1) *
|
cai_linear.scales[:, i * cai_split_out_features : (i + 1) * cai_split_out_features] = scales[i][
|
||||||
cai_split_out_features]
|
:, tp_rank * cai_split_out_features : (tp_rank + 1) * cai_split_out_features
|
||||||
|
]
|
||||||
if cai_linear.bias is not None:
|
if cai_linear.bias is not None:
|
||||||
cai_linear.bias[i * cai_split_out_features:(i + 1) *
|
cai_linear.bias[i * cai_split_out_features : (i + 1) * cai_split_out_features] = bias[i][
|
||||||
cai_split_out_features] = bias[i][tp_rank * cai_split_out_features:(tp_rank + 1) *
|
tp_rank * cai_split_out_features : (tp_rank + 1) * cai_split_out_features
|
||||||
cai_split_out_features]
|
]
|
||||||
|
|
||||||
cai_linear.g_idx.copy_(g_idx)
|
cai_linear.g_idx.copy_(g_idx)
|
||||||
|
|
||||||
|
|
||||||
def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1):
|
def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1):
|
||||||
|
|
||||||
qweights = gptq_linear.qweight.split(gptq_linear.in_features // split_num, dim=0)
|
qweights = gptq_linear.qweight.split(gptq_linear.in_features // split_num, dim=0)
|
||||||
qzeros = gptq_linear.qzeros.split(gptq_linear.in_features // split_num, dim=0)
|
qzeros = gptq_linear.qzeros.split(gptq_linear.in_features // split_num, dim=0)
|
||||||
scales = gptq_linear.scales.split(gptq_linear.in_features // split_num, dim=0)
|
scales = gptq_linear.scales.split(gptq_linear.in_features // split_num, dim=0)
|
||||||
|
@ -231,47 +239,40 @@ def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1):
|
||||||
idx_split_features = cai_linear.infeatures // split_num
|
idx_split_features = cai_linear.infeatures // split_num
|
||||||
|
|
||||||
for i in range(split_num):
|
for i in range(split_num):
|
||||||
cai_linear.qweight[i * cai_split_in_features:(i + 1) *
|
cai_linear.qweight[i * cai_split_in_features : (i + 1) * cai_split_in_features, :] = qweights[i][
|
||||||
cai_split_in_features, :] = qweights[i][tp_rank * cai_split_in_features:(tp_rank + 1) *
|
tp_rank * cai_split_in_features : (tp_rank + 1) * cai_split_in_features, :
|
||||||
cai_split_in_features, :]
|
]
|
||||||
cai_linear.qzeros[i * zero_split_block:(i + 1) *
|
cai_linear.qzeros[i * zero_split_block : (i + 1) * zero_split_block, :] = qzeros[i][
|
||||||
zero_split_block, :] = qzeros[i][tp_rank * zero_split_block:(tp_rank + 1) *
|
tp_rank * zero_split_block : (tp_rank + 1) * zero_split_block, :
|
||||||
zero_split_block, :]
|
]
|
||||||
cai_linear.scales[i * zero_split_block:(i + 1) *
|
cai_linear.scales[i * zero_split_block : (i + 1) * zero_split_block, :] = scales[i][
|
||||||
zero_split_block, :] = scales[i][tp_rank * zero_split_block:(tp_rank + 1) *
|
tp_rank * zero_split_block : (tp_rank + 1) * zero_split_block, :
|
||||||
zero_split_block, :]
|
]
|
||||||
cai_linear.g_idx[i * idx_split_features:(i + 1) *
|
cai_linear.g_idx[i * idx_split_features : (i + 1) * idx_split_features] = g_idxs[i][
|
||||||
idx_split_features] = g_idxs[i][tp_rank * idx_split_features:(tp_rank + 1) *
|
tp_rank * idx_split_features : (tp_rank + 1) * idx_split_features
|
||||||
idx_split_features]
|
]
|
||||||
if cai_linear.bias is not None:
|
if cai_linear.bias is not None:
|
||||||
cai_linear.bias.copy_(gptq_linear.bias)
|
cai_linear.bias.copy_(gptq_linear.bias)
|
||||||
|
|
||||||
|
|
||||||
class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
||||||
|
|
||||||
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
|
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
|
||||||
|
super().__init__(
|
||||||
super().__init__(bits,
|
bits, groupsize, infeatures, outfeatures, bias, tp_size=tp_size, tp_rank=tp_rank, row_split=row_split
|
||||||
groupsize,
|
)
|
||||||
infeatures,
|
|
||||||
outfeatures,
|
|
||||||
bias,
|
|
||||||
tp_size=tp_size,
|
|
||||||
tp_rank=tp_rank,
|
|
||||||
row_split=row_split)
|
|
||||||
self.process_group = None
|
self.process_group = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_native_module(module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args,
|
def from_native_module(
|
||||||
**kwargs) -> ParallelModule:
|
module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args, **kwargs
|
||||||
|
) -> ParallelModule:
|
||||||
LazyInitContext.materialize(module)
|
LazyInitContext.materialize(module)
|
||||||
# get the attributes
|
# get the attributes
|
||||||
in_features = module.in_features
|
in_features = module.in_features
|
||||||
|
|
||||||
# ensure only one process group is passed
|
# ensure only one process group is passed
|
||||||
if isinstance(process_group, (list, tuple)):
|
if isinstance(process_group, (list, tuple)):
|
||||||
assert len(process_group) == 1, \
|
assert len(process_group) == 1, f"Expected only one process group, got {len(process_group)}."
|
||||||
f'Expected only one process group, got {len(process_group)}.'
|
|
||||||
process_group = process_group[0]
|
process_group = process_group[0]
|
||||||
|
|
||||||
tp_size = dist.get_world_size(process_group)
|
tp_size = dist.get_world_size(process_group)
|
||||||
|
@ -282,15 +283,18 @@ class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
||||||
|
|
||||||
if in_features % tp_size != 0:
|
if in_features % tp_size != 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!")
|
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!"
|
||||||
linear_1d = RowCaiQuantLinear(module.bits,
|
)
|
||||||
module.group_size,
|
linear_1d = RowCaiQuantLinear(
|
||||||
module.in_features // tp_size,
|
module.bits,
|
||||||
module.out_features,
|
module.group_size,
|
||||||
module.bias is not None,
|
module.in_features // tp_size,
|
||||||
tp_size=tp_size,
|
module.out_features,
|
||||||
tp_rank=tp_rank,
|
module.bias is not None,
|
||||||
row_split=True)
|
tp_size=tp_size,
|
||||||
|
tp_rank=tp_rank,
|
||||||
|
row_split=True,
|
||||||
|
)
|
||||||
linear_1d.process_group = process_group
|
linear_1d.process_group = process_group
|
||||||
|
|
||||||
split_row_copy(module, linear_1d, tp_rank=tp_rank, **kwargs)
|
split_row_copy(module, linear_1d, tp_rank=tp_rank, **kwargs)
|
||||||
|
@ -306,30 +310,23 @@ class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
||||||
|
|
||||||
|
|
||||||
class ColCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
class ColCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
||||||
|
|
||||||
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
|
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
|
||||||
|
super().__init__(
|
||||||
super().__init__(bits,
|
bits, groupsize, infeatures, outfeatures, bias, tp_size=tp_size, tp_rank=tp_rank, row_split=row_split
|
||||||
groupsize,
|
)
|
||||||
infeatures,
|
|
||||||
outfeatures,
|
|
||||||
bias,
|
|
||||||
tp_size=tp_size,
|
|
||||||
tp_rank=tp_rank,
|
|
||||||
row_split=row_split)
|
|
||||||
self.process_group = None
|
self.process_group = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_native_module(module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args,
|
def from_native_module(
|
||||||
**kwargs) -> ParallelModule:
|
module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args, **kwargs
|
||||||
|
) -> ParallelModule:
|
||||||
LazyInitContext.materialize(module)
|
LazyInitContext.materialize(module)
|
||||||
# get the attributes
|
# get the attributes
|
||||||
in_features = module.in_features
|
in_features = module.in_features
|
||||||
|
|
||||||
# ensure only one process group is passed
|
# ensure only one process group is passed
|
||||||
if isinstance(process_group, (list, tuple)):
|
if isinstance(process_group, (list, tuple)):
|
||||||
assert len(process_group) == 1, \
|
assert len(process_group) == 1, f"Expected only one process group, got {len(process_group)}."
|
||||||
f'Expected only one process group, got {len(process_group)}.'
|
|
||||||
process_group = process_group[0]
|
process_group = process_group[0]
|
||||||
|
|
||||||
tp_size = dist.get_world_size(process_group)
|
tp_size = dist.get_world_size(process_group)
|
||||||
|
@ -340,14 +337,17 @@ class ColCaiQuantLinear(CaiQuantLinear, ParallelModule):
|
||||||
|
|
||||||
if in_features % tp_size != 0:
|
if in_features % tp_size != 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!")
|
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!"
|
||||||
linear_1d = ColCaiQuantLinear(module.bits,
|
)
|
||||||
module.group_size,
|
linear_1d = ColCaiQuantLinear(
|
||||||
module.in_features,
|
module.bits,
|
||||||
module.out_features // tp_size,
|
module.group_size,
|
||||||
module.bias is not None,
|
module.in_features,
|
||||||
tp_size=tp_size,
|
module.out_features // tp_size,
|
||||||
tp_rank=tp_rank)
|
module.bias is not None,
|
||||||
|
tp_size=tp_size,
|
||||||
|
tp_rank=tp_rank,
|
||||||
|
)
|
||||||
linear_1d.process_group = process_group
|
linear_1d.process_group = process_group
|
||||||
|
|
||||||
split_column_copy(module, linear_1d, tp_rank=tp_rank, **kwargs)
|
split_column_copy(module, linear_1d, tp_rank=tp_rank, **kwargs)
|
||||||
|
|
|
@ -5,6 +5,7 @@ import torch
|
||||||
try:
|
try:
|
||||||
import triton
|
import triton
|
||||||
import triton.language as tl
|
import triton.language as tl
|
||||||
|
|
||||||
HAS_TRITON = True
|
HAS_TRITON = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
HAS_TRITON = False
|
HAS_TRITON = False
|
||||||
|
@ -16,6 +17,7 @@ if HAS_TRITON:
|
||||||
https://github.com/ModelTC/lightllm/blob/f093edc20683ac3ea1bca3fb5d8320a0dd36cf7b/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py#L10
|
https://github.com/ModelTC/lightllm/blob/f093edc20683ac3ea1bca3fb5d8320a0dd36cf7b/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py#L10
|
||||||
"""
|
"""
|
||||||
if triton.__version__ < "2.1.0":
|
if triton.__version__ < "2.1.0":
|
||||||
|
|
||||||
@triton.jit
|
@triton.jit
|
||||||
def _context_flash_attention_kernel(
|
def _context_flash_attention_kernel(
|
||||||
Q,
|
Q,
|
||||||
|
@ -131,29 +133,47 @@ if HAS_TRITON:
|
||||||
m_i = m_i_new
|
m_i = m_i_new
|
||||||
|
|
||||||
off_o = (
|
off_o = (
|
||||||
(cur_batch_start_index + offs_m[:, None]) * stride_obs + cur_head * stride_oh + offs_d[None, :] * stride_od
|
(cur_batch_start_index + offs_m[:, None]) * stride_obs
|
||||||
|
+ cur_head * stride_oh
|
||||||
|
+ offs_d[None, :] * stride_od
|
||||||
)
|
)
|
||||||
out_ptrs = Out + off_o
|
out_ptrs = Out + off_o
|
||||||
tl.store(out_ptrs, acc, mask=offs_m[:, None] < cur_batch_seq_len)
|
tl.store(out_ptrs, acc, mask=offs_m[:, None] < cur_batch_seq_len)
|
||||||
return
|
return
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# this function is modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py#L11
|
# this function is modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py#L11
|
||||||
@triton.jit
|
@triton.jit
|
||||||
def _context_flash_attention_kernel_2(
|
def _context_flash_attention_kernel_2(
|
||||||
Q, K, V, sm_scale, Alibi, B_Start_Loc, B_Seqlen,
|
Q,
|
||||||
Out,
|
K,
|
||||||
kv_group_num,
|
V,
|
||||||
stride_qbs, stride_qh, stride_qd,
|
sm_scale,
|
||||||
stride_kbs, stride_kh, stride_kd,
|
Alibi,
|
||||||
stride_vbs, stride_vh, stride_vd,
|
B_Start_Loc,
|
||||||
stride_obs, stride_oh, stride_od,
|
B_Seqlen,
|
||||||
BLOCK_M: tl.constexpr, BLOCK_DMODEL: tl.constexpr,
|
Out,
|
||||||
|
kv_group_num,
|
||||||
|
stride_qbs,
|
||||||
|
stride_qh,
|
||||||
|
stride_qd,
|
||||||
|
stride_kbs,
|
||||||
|
stride_kh,
|
||||||
|
stride_kd,
|
||||||
|
stride_vbs,
|
||||||
|
stride_vh,
|
||||||
|
stride_vd,
|
||||||
|
stride_obs,
|
||||||
|
stride_oh,
|
||||||
|
stride_od,
|
||||||
|
BLOCK_M: tl.constexpr,
|
||||||
|
BLOCK_DMODEL: tl.constexpr,
|
||||||
BLOCK_N: tl.constexpr,
|
BLOCK_N: tl.constexpr,
|
||||||
):
|
):
|
||||||
cur_batch = tl.program_id(0)
|
cur_batch = tl.program_id(0)
|
||||||
cur_head = tl.program_id(1)
|
cur_head = tl.program_id(1)
|
||||||
start_m = tl.program_id(2)
|
start_m = tl.program_id(2)
|
||||||
|
|
||||||
if kv_group_num is not None:
|
if kv_group_num is not None:
|
||||||
cur_kv_head = cur_head // kv_group_num
|
cur_kv_head = cur_head // kv_group_num
|
||||||
|
|
||||||
|
@ -166,7 +186,11 @@ if HAS_TRITON:
|
||||||
offs_n = tl.arange(0, BLOCK_N)
|
offs_n = tl.arange(0, BLOCK_N)
|
||||||
offs_d = tl.arange(0, BLOCK_DMODEL)
|
offs_d = tl.arange(0, BLOCK_DMODEL)
|
||||||
offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
|
offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
|
||||||
off_q = (cur_batch_in_all_start_index + offs_m[:, None]) * stride_qbs + cur_head * stride_qh + offs_d[None, :] * stride_qd
|
off_q = (
|
||||||
|
(cur_batch_in_all_start_index + offs_m[:, None]) * stride_qbs
|
||||||
|
+ cur_head * stride_qh
|
||||||
|
+ offs_d[None, :] * stride_qd
|
||||||
|
)
|
||||||
if kv_group_num is None or kv_group_num == 1:
|
if kv_group_num is None or kv_group_num == 1:
|
||||||
off_k = offs_n[None, :] * stride_kbs + cur_head * stride_kh + offs_d[:, None] * stride_kd
|
off_k = offs_n[None, :] * stride_kbs + cur_head * stride_kh + offs_d[:, None] * stride_kd
|
||||||
off_v = offs_n[:, None] * stride_vbs + cur_head * stride_vh + offs_d[None, :] * stride_vd
|
off_v = offs_n[:, None] * stride_vbs + cur_head * stride_vh + offs_d[None, :] * stride_vd
|
||||||
|
@ -191,8 +215,11 @@ if HAS_TRITON:
|
||||||
for start_n in range(0, block_mask * (start_m + 1) * BLOCK_M, BLOCK_N):
|
for start_n in range(0, block_mask * (start_m + 1) * BLOCK_M, BLOCK_N):
|
||||||
start_n = tl.multiple_of(start_n, BLOCK_N)
|
start_n = tl.multiple_of(start_n, BLOCK_N)
|
||||||
# -- compute qk ----
|
# -- compute qk ----
|
||||||
k = tl.load(k_ptrs + (cur_batch_in_all_start_index + start_n) * stride_kbs,
|
k = tl.load(
|
||||||
mask=(start_n + offs_n[None, :]) < cur_batch_seq_len, other=0.0)
|
k_ptrs + (cur_batch_in_all_start_index + start_n) * stride_kbs,
|
||||||
|
mask=(start_n + offs_n[None, :]) < cur_batch_seq_len,
|
||||||
|
other=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
|
qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
|
||||||
qk += tl.dot(q, k)
|
qk += tl.dot(q, k)
|
||||||
|
@ -220,8 +247,11 @@ if HAS_TRITON:
|
||||||
acc_scale = l_i / l_i_new * alpha
|
acc_scale = l_i / l_i_new * alpha
|
||||||
acc = acc * acc_scale[:, None]
|
acc = acc * acc_scale[:, None]
|
||||||
# update acc
|
# update acc
|
||||||
v = tl.load(v_ptrs + (cur_batch_in_all_start_index + start_n) * stride_vbs,
|
v = tl.load(
|
||||||
mask=(start_n + offs_n[:, None]) < cur_batch_seq_len, other=0.0)
|
v_ptrs + (cur_batch_in_all_start_index + start_n) * stride_vbs,
|
||||||
|
mask=(start_n + offs_n[:, None]) < cur_batch_seq_len,
|
||||||
|
other=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
p = p.to(v.dtype)
|
p = p.to(v.dtype)
|
||||||
acc += tl.dot(p, v)
|
acc += tl.dot(p, v)
|
||||||
|
@ -229,7 +259,11 @@ if HAS_TRITON:
|
||||||
l_i = l_i_new
|
l_i = l_i_new
|
||||||
m_i = m_i_new
|
m_i = m_i_new
|
||||||
# initialize pointers to output
|
# initialize pointers to output
|
||||||
off_o = (cur_batch_in_all_start_index + offs_m[:, None]) * stride_obs + cur_head * stride_oh + offs_d[None, :] * stride_od
|
off_o = (
|
||||||
|
(cur_batch_in_all_start_index + offs_m[:, None]) * stride_obs
|
||||||
|
+ cur_head * stride_oh
|
||||||
|
+ offs_d[None, :] * stride_od
|
||||||
|
)
|
||||||
out_ptrs = Out + off_o
|
out_ptrs = Out + off_o
|
||||||
tl.store(out_ptrs, acc, mask=offs_m[:, None] < cur_batch_seq_len)
|
tl.store(out_ptrs, acc, mask=offs_m[:, None] < cur_batch_seq_len)
|
||||||
return
|
return
|
||||||
|
@ -249,7 +283,7 @@ if HAS_TRITON:
|
||||||
grid = (batch, head, triton.cdiv(max_input_len, BLOCK))
|
grid = (batch, head, triton.cdiv(max_input_len, BLOCK))
|
||||||
|
|
||||||
num_warps = 4 if Lk <= 64 else 8
|
num_warps = 4 if Lk <= 64 else 8
|
||||||
|
|
||||||
if triton.__version__ < "2.1.0":
|
if triton.__version__ < "2.1.0":
|
||||||
tmp = torch.empty((batch, head, max_input_len + 256), device=q.device, dtype=torch.float32)
|
tmp = torch.empty((batch, head, max_input_len + 256), device=q.device, dtype=torch.float32)
|
||||||
_context_flash_attention_kernel[grid](
|
_context_flash_attention_kernel[grid](
|
||||||
|
@ -286,20 +320,26 @@ if HAS_TRITON:
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
_context_flash_attention_kernel_2[grid](
|
_context_flash_attention_kernel_2[grid](
|
||||||
q, k, v, sm_scale, alibi, b_start_loc, b_seq_len,
|
q,
|
||||||
|
k,
|
||||||
|
v,
|
||||||
|
sm_scale,
|
||||||
|
alibi,
|
||||||
|
b_start_loc,
|
||||||
|
b_seq_len,
|
||||||
o,
|
o,
|
||||||
None,
|
None,
|
||||||
q.stride(0),
|
q.stride(0),
|
||||||
q.stride(1),
|
q.stride(1),
|
||||||
q.stride(2),
|
q.stride(2),
|
||||||
k.stride(0),
|
k.stride(0),
|
||||||
k.stride(1),
|
k.stride(1),
|
||||||
k.stride(2),
|
k.stride(2),
|
||||||
v.stride(0),
|
v.stride(0),
|
||||||
v.stride(1),
|
v.stride(1),
|
||||||
v.stride(2),
|
v.stride(2),
|
||||||
o.stride(0),
|
o.stride(0),
|
||||||
o.stride(1),
|
o.stride(1),
|
||||||
o.stride(2),
|
o.stride(2),
|
||||||
BLOCK_M=BLOCK,
|
BLOCK_M=BLOCK,
|
||||||
BLOCK_DMODEL=Lk,
|
BLOCK_DMODEL=Lk,
|
||||||
|
@ -307,7 +347,7 @@ if HAS_TRITON:
|
||||||
num_warps=num_warps,
|
num_warps=num_warps,
|
||||||
num_stages=1,
|
num_stages=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
|
@ -327,7 +367,7 @@ if HAS_TRITON:
|
||||||
tmp = torch.empty((batch, head, max_input_len + 256), device=q.device, dtype=torch.float32)
|
tmp = torch.empty((batch, head, max_input_len + 256), device=q.device, dtype=torch.float32)
|
||||||
num_warps = 4 if Lk <= 64 else 8
|
num_warps = 4 if Lk <= 64 else 8
|
||||||
# num_warps = 4
|
# num_warps = 4
|
||||||
|
|
||||||
if triton.__version__ < "2.1.0":
|
if triton.__version__ < "2.1.0":
|
||||||
_context_flash_attention_kernel[grid](
|
_context_flash_attention_kernel[grid](
|
||||||
q,
|
q,
|
||||||
|
@ -337,7 +377,7 @@ if HAS_TRITON:
|
||||||
b_start_loc,
|
b_start_loc,
|
||||||
b_seq_len,
|
b_seq_len,
|
||||||
tmp,
|
tmp,
|
||||||
None,
|
None,
|
||||||
o,
|
o,
|
||||||
q.stride(0),
|
q.stride(0),
|
||||||
q.stride(1),
|
q.stride(1),
|
||||||
|
@ -362,32 +402,33 @@ if HAS_TRITON:
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
kv_group_num = q.shape[1] // k.shape[1]
|
kv_group_num = q.shape[1] // k.shape[1]
|
||||||
_context_flash_attention_kernel_2[grid](
|
_context_flash_attention_kernel_2[grid](
|
||||||
q,
|
q,
|
||||||
k,
|
k,
|
||||||
v,
|
v,
|
||||||
sm_scale,
|
sm_scale,
|
||||||
None,
|
None,
|
||||||
b_start_loc,
|
b_start_loc,
|
||||||
b_seq_len,
|
b_seq_len,
|
||||||
o,
|
o,
|
||||||
kv_group_num,
|
kv_group_num,
|
||||||
q.stride(0),
|
q.stride(0),
|
||||||
q.stride(1),
|
q.stride(1),
|
||||||
q.stride(2),
|
q.stride(2),
|
||||||
k.stride(0),
|
k.stride(0),
|
||||||
k.stride(1),
|
k.stride(1),
|
||||||
k.stride(2),
|
k.stride(2),
|
||||||
v.stride(0),
|
v.stride(0),
|
||||||
v.stride(1),
|
v.stride(1),
|
||||||
v.stride(2),
|
v.stride(2),
|
||||||
o.stride(0),
|
o.stride(0),
|
||||||
o.stride(1),
|
o.stride(1),
|
||||||
o.stride(2),
|
o.stride(2),
|
||||||
BLOCK_M=BLOCK,
|
BLOCK_M=BLOCK,
|
||||||
BLOCK_DMODEL=Lk,
|
BLOCK_DMODEL=Lk,
|
||||||
BLOCK_N=BLOCK,
|
BLOCK_N=BLOCK,
|
||||||
num_warps=num_warps,
|
num_warps=num_warps,
|
||||||
num_stages=1,)
|
num_stages=1,
|
||||||
|
)
|
||||||
return
|
|
||||||
|
return
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
# adepted from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8/lightllm/models/llama/triton_kernel/flash_decoding.py
|
# adepted from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8/lightllm/models/llama/triton_kernel/flash_decoding.py
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from lightllm.models.llama.triton_kernel.flash_decoding_stage1 import flash_decode_stage1
|
from lightllm.models.llama.triton_kernel.flash_decoding_stage1 import flash_decode_stage1
|
||||||
from lightllm.models.llama.triton_kernel.flash_decoding_stage2 import flash_decode_stage2
|
from lightllm.models.llama.triton_kernel.flash_decoding_stage2 import flash_decode_stage2
|
||||||
|
|
||||||
HAS_LIGHTLLM_KERNEL = True
|
HAS_LIGHTLLM_KERNEL = True
|
||||||
except:
|
except:
|
||||||
print("install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8")
|
print("install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8")
|
||||||
|
@ -10,41 +12,36 @@ except:
|
||||||
|
|
||||||
|
|
||||||
if HAS_LIGHTLLM_KERNEL:
|
if HAS_LIGHTLLM_KERNEL:
|
||||||
|
|
||||||
def token_flash_decoding(q, o_tensor, infer_state, q_head_num, head_dim, cache_k, cache_v):
|
def token_flash_decoding(q, o_tensor, infer_state, q_head_num, head_dim, cache_k, cache_v):
|
||||||
BLOCK_SEQ = 256
|
BLOCK_SEQ = 256
|
||||||
batch_size = infer_state.batch_size
|
batch_size = infer_state.batch_size
|
||||||
max_len_in_batch = infer_state.max_len_in_batch
|
max_len_in_batch = infer_state.max_len_in_batch
|
||||||
|
|
||||||
|
|
||||||
calcu_shape1 = (batch_size, q_head_num, head_dim)
|
calcu_shape1 = (batch_size, q_head_num, head_dim)
|
||||||
|
|
||||||
if getattr(infer_state, 'mid_o', None) is None:
|
if getattr(infer_state, "mid_o", None) is None:
|
||||||
infer_state.mid_o = torch.empty([batch_size,
|
infer_state.mid_o = torch.empty(
|
||||||
q_head_num,
|
[batch_size, q_head_num, max_len_in_batch // BLOCK_SEQ + 1, head_dim],
|
||||||
max_len_in_batch // BLOCK_SEQ + 1,
|
dtype=torch.float32,
|
||||||
head_dim],
|
device="cuda",
|
||||||
dtype=torch.float32,
|
)
|
||||||
device="cuda")
|
infer_state.mid_o_logexpsum = torch.empty(
|
||||||
infer_state.mid_o_logexpsum = torch.empty([batch_size,
|
[batch_size, q_head_num, max_len_in_batch // BLOCK_SEQ + 1], dtype=torch.float32, device="cuda"
|
||||||
q_head_num,
|
)
|
||||||
max_len_in_batch // BLOCK_SEQ + 1],
|
|
||||||
dtype=torch.float32,
|
|
||||||
device="cuda")
|
|
||||||
|
|
||||||
mid_o = infer_state.mid_o
|
mid_o = infer_state.mid_o
|
||||||
mid_o_logexpsum = infer_state.mid_o_logexpsum
|
mid_o_logexpsum = infer_state.mid_o_logexpsum
|
||||||
|
|
||||||
flash_decode_stage1(q.view(calcu_shape1),
|
flash_decode_stage1(
|
||||||
cache_k,
|
q.view(calcu_shape1),
|
||||||
cache_v,
|
cache_k,
|
||||||
infer_state.block_loc,
|
cache_v,
|
||||||
infer_state.seq_len,
|
infer_state.block_loc,
|
||||||
infer_state.max_len_in_batch,
|
infer_state.seq_len,
|
||||||
mid_o,
|
infer_state.max_len_in_batch,
|
||||||
mid_o_logexpsum,
|
mid_o,
|
||||||
BLOCK_SEQ)
|
mid_o_logexpsum,
|
||||||
flash_decode_stage2(mid_o,
|
BLOCK_SEQ,
|
||||||
mid_o_logexpsum,
|
)
|
||||||
infer_state.seq_len,
|
flash_decode_stage2(mid_o, mid_o_logexpsum, infer_state.seq_len, o_tensor.view(calcu_shape1), BLOCK_SEQ)
|
||||||
o_tensor.view(calcu_shape1),
|
|
||||||
BLOCK_SEQ)
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ from torch.cuda.amp import custom_bwd, custom_fwd
|
||||||
try:
|
try:
|
||||||
import triton
|
import triton
|
||||||
import triton.language as tl
|
import triton.language as tl
|
||||||
|
|
||||||
HAS_TRITON = True
|
HAS_TRITON = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
HAS_TRITON = False
|
HAS_TRITON = False
|
||||||
|
@ -26,8 +27,8 @@ if HAS_TRITON:
|
||||||
X_GATE2,
|
X_GATE2,
|
||||||
X_UP,
|
X_UP,
|
||||||
Y,
|
Y,
|
||||||
stride, # how much to increase the pointer when moving by 1 row
|
stride, # how much to increase the pointer when moving by 1 row
|
||||||
N, # number of columns in X
|
N, # number of columns in X
|
||||||
BLOCK_SIZE: tl.constexpr,
|
BLOCK_SIZE: tl.constexpr,
|
||||||
):
|
):
|
||||||
# Map the program id to the row of X and Y it should compute.
|
# Map the program id to the row of X and Y it should compute.
|
||||||
|
@ -41,9 +42,9 @@ if HAS_TRITON:
|
||||||
for off in range(0, N, BLOCK_SIZE):
|
for off in range(0, N, BLOCK_SIZE):
|
||||||
cols = off + tl.arange(0, BLOCK_SIZE)
|
cols = off + tl.arange(0, BLOCK_SIZE)
|
||||||
mask = cols < N
|
mask = cols < N
|
||||||
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.)
|
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.0)
|
||||||
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.)
|
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.0)
|
||||||
x_up = tl.load(X_UP + cols, mask=mask, other=0.)
|
x_up = tl.load(X_UP + cols, mask=mask, other=0.0)
|
||||||
x_gate2_sigmoid = tl.sigmoid(x_gate2.to(tl.float32)).to(x_gate2.dtype)
|
x_gate2_sigmoid = tl.sigmoid(x_gate2.to(tl.float32)).to(x_gate2.dtype)
|
||||||
y = x_gate1 * x_gate2 * x_gate2_sigmoid * x_up
|
y = x_gate1 * x_gate2 * x_gate2_sigmoid * x_up
|
||||||
# Write output
|
# Write output
|
||||||
|
@ -58,8 +59,8 @@ if HAS_TRITON:
|
||||||
X_GATE2_GRAD,
|
X_GATE2_GRAD,
|
||||||
X_UP_GRAD,
|
X_UP_GRAD,
|
||||||
Y_GRAD,
|
Y_GRAD,
|
||||||
stride, # how much to increase the pointer when moving by 1 row
|
stride, # how much to increase the pointer when moving by 1 row
|
||||||
N, # number of columns in X
|
N, # number of columns in X
|
||||||
BLOCK_SIZE: tl.constexpr,
|
BLOCK_SIZE: tl.constexpr,
|
||||||
):
|
):
|
||||||
# Map the program id to the row of X and Y it should compute.
|
# Map the program id to the row of X and Y it should compute.
|
||||||
|
@ -76,10 +77,10 @@ if HAS_TRITON:
|
||||||
for off in range(0, N, BLOCK_SIZE):
|
for off in range(0, N, BLOCK_SIZE):
|
||||||
cols = off + tl.arange(0, BLOCK_SIZE)
|
cols = off + tl.arange(0, BLOCK_SIZE)
|
||||||
mask = cols < N
|
mask = cols < N
|
||||||
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.)
|
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.0)
|
||||||
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.)
|
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.0)
|
||||||
x_up = tl.load(X_UP + cols, mask=mask, other=0.)
|
x_up = tl.load(X_UP + cols, mask=mask, other=0.0)
|
||||||
y_grad = tl.load(Y_GRAD + cols, mask=mask, other=0.)
|
y_grad = tl.load(Y_GRAD + cols, mask=mask, other=0.0)
|
||||||
|
|
||||||
# forward: y = x_gate1 * x_gate2 * tl.sigmoid(x_gate2) * x_up
|
# forward: y = x_gate1 * x_gate2 * tl.sigmoid(x_gate2) * x_up
|
||||||
x_gate2_sigmoid = tl.sigmoid(x_gate2.to(tl.float32)).to(x_gate2.dtype)
|
x_gate2_sigmoid = tl.sigmoid(x_gate2.to(tl.float32)).to(x_gate2.dtype)
|
||||||
|
@ -147,14 +148,9 @@ if HAS_TRITON:
|
||||||
# restore setting
|
# restore setting
|
||||||
ctx.M, ctx.N, ctx.BLOCK_SIZE, ctx.num_warps = M, N, BLOCK_SIZE, num_warps
|
ctx.M, ctx.N, ctx.BLOCK_SIZE, ctx.num_warps = M, N, BLOCK_SIZE, num_warps
|
||||||
# enqueue kernel
|
# enqueue kernel
|
||||||
_llama_act_combine_forward[(M,)](x_gate1,
|
_llama_act_combine_forward[(M,)](
|
||||||
x_gate2,
|
x_gate1, x_gate2, x_up, y, x_up.stride(-2), N, BLOCK_SIZE=BLOCK_SIZE, num_warps=num_warps
|
||||||
x_up,
|
)
|
||||||
y,
|
|
||||||
x_up.stride(-2),
|
|
||||||
N,
|
|
||||||
BLOCK_SIZE=BLOCK_SIZE,
|
|
||||||
num_warps=num_warps)
|
|
||||||
return y
|
return y
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -166,20 +162,25 @@ if HAS_TRITON:
|
||||||
|
|
||||||
# init grad
|
# init grad
|
||||||
y_grad = grad_outputs[0]
|
y_grad = grad_outputs[0]
|
||||||
x_gate1_grad, x_gate2_grad, x_up_grad = torch.empty_like(x_gate1), torch.empty_like(
|
x_gate1_grad, x_gate2_grad, x_up_grad = (
|
||||||
x_gate2), torch.empty_like(x_up)
|
torch.empty_like(x_gate1),
|
||||||
|
torch.empty_like(x_gate2),
|
||||||
|
torch.empty_like(x_up),
|
||||||
|
)
|
||||||
|
|
||||||
# enqueue kernel
|
# enqueue kernel
|
||||||
_llama_act_combine_backward[(M,)](x_gate1,
|
_llama_act_combine_backward[(M,)](
|
||||||
x_gate2,
|
x_gate1,
|
||||||
x_up,
|
x_gate2,
|
||||||
x_gate1_grad,
|
x_up,
|
||||||
x_gate2_grad,
|
x_gate1_grad,
|
||||||
x_up_grad,
|
x_gate2_grad,
|
||||||
y_grad,
|
x_up_grad,
|
||||||
x_up.stride(-2),
|
y_grad,
|
||||||
N,
|
x_up.stride(-2),
|
||||||
BLOCK_SIZE=BLOCK_SIZE,
|
N,
|
||||||
num_warps=num_warps)
|
BLOCK_SIZE=BLOCK_SIZE,
|
||||||
|
num_warps=num_warps,
|
||||||
|
)
|
||||||
x_gate_grad = torch.cat([x_gate1_grad, x_gate2_grad], dim=-1)
|
x_gate_grad = torch.cat([x_gate1_grad, x_gate2_grad], dim=-1)
|
||||||
return x_gate_grad, x_up_grad, None, None
|
return x_gate_grad, x_up_grad, None, None
|
||||||
|
|
|
@ -13,10 +13,18 @@ except ImportError:
|
||||||
print("please install triton from https://github.com/openai/triton")
|
print("please install triton from https://github.com/openai/triton")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from lightllm.models.llama.triton_kernel.token_attention_nopad_reduceV import token_att_fwd2 as lightllm_llama_token_att_fwd2
|
from lightllm.models.bloom.triton_kernel.token_attention_nopad_att1 import (
|
||||||
from lightllm.models.llama.triton_kernel.token_attention_nopad_att1 import token_att_fwd as lightllm_llama_token_att_fwd
|
token_att_fwd as lightllm_bloom_token_att_fwd,
|
||||||
from lightllm.models.llama.triton_kernel.token_attention_nopad_softmax import token_softmax_fwd as lightllm_llama_token_softmax_fwd
|
)
|
||||||
from lightllm.models.bloom.triton_kernel.token_attention_nopad_att1 import token_att_fwd as lightllm_bloom_token_att_fwd
|
from lightllm.models.llama.triton_kernel.token_attention_nopad_att1 import (
|
||||||
|
token_att_fwd as lightllm_llama_token_att_fwd,
|
||||||
|
)
|
||||||
|
from lightllm.models.llama.triton_kernel.token_attention_nopad_reduceV import (
|
||||||
|
token_att_fwd2 as lightllm_llama_token_att_fwd2,
|
||||||
|
)
|
||||||
|
from lightllm.models.llama.triton_kernel.token_attention_nopad_softmax import (
|
||||||
|
token_softmax_fwd as lightllm_llama_token_softmax_fwd,
|
||||||
|
)
|
||||||
|
|
||||||
HAS_TRITON_TOKEN_ATTENTION = True
|
HAS_TRITON_TOKEN_ATTENTION = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -205,9 +213,7 @@ class Llama2TokenAttentionForwards:
|
||||||
|
|
||||||
if triton.__version__ == "2.0.0":
|
if triton.__version__ == "2.0.0":
|
||||||
prob = torch.empty_like(att_m_tensor)
|
prob = torch.empty_like(att_m_tensor)
|
||||||
lightllm_llama_token_softmax_fwd(
|
lightllm_llama_token_softmax_fwd(att_m_tensor, kv_cache_start_loc, kv_cache_seq_len, prob, max_len_in_batch)
|
||||||
att_m_tensor, kv_cache_start_loc, kv_cache_seq_len, prob, max_len_in_batch
|
|
||||||
)
|
|
||||||
att_m_tensor = None
|
att_m_tensor = None
|
||||||
|
|
||||||
lightllm_llama_token_att_fwd2(
|
lightllm_llama_token_att_fwd2(
|
||||||
|
|
|
@ -8,7 +8,9 @@ from transformers.models.llama.modeling_llama import LlamaAttention, LlamaDecode
|
||||||
from colossalai.inference.tensor_parallel.batch_infer_state import BatchInferState
|
from colossalai.inference.tensor_parallel.batch_infer_state import BatchInferState
|
||||||
from colossalai.kernel.triton import llama_context_attn_fwd, token_attention_fwd
|
from colossalai.kernel.triton import llama_context_attn_fwd, token_attention_fwd
|
||||||
from colossalai.kernel.triton.token_attention_kernel import Llama2TokenAttentionForwards
|
from colossalai.kernel.triton.token_attention_kernel import Llama2TokenAttentionForwards
|
||||||
|
|
||||||
from ._utils import copy_kv_to_mem_cache
|
from ._utils import copy_kv_to_mem_cache
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from lightllm.models.llama.triton_kernel.context_flashattention_nopad import (
|
from lightllm.models.llama.triton_kernel.context_flashattention_nopad import (
|
||||||
context_attention_fwd as lightllm_llama_context_attention_fwd,
|
context_attention_fwd as lightllm_llama_context_attention_fwd,
|
||||||
|
@ -90,7 +92,7 @@ def llama_triton_token_attention(query_states, attn_output, infer_state, num_key
|
||||||
# infer_state.cache_manager.past_key_values_length,
|
# infer_state.cache_manager.past_key_values_length,
|
||||||
infer_state.max_len_in_batch,
|
infer_state.max_len_in_batch,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
Llama2TokenAttentionForwards.token_attn(
|
Llama2TokenAttentionForwards.token_attn(
|
||||||
query_states,
|
query_states,
|
||||||
|
|
|
@ -44,7 +44,7 @@ class Qwen2PipelineForwards:
|
||||||
hidden_states: Optional[torch.FloatTensor] = None,
|
hidden_states: Optional[torch.FloatTensor] = None,
|
||||||
stage_index: Optional[List[int]] = None,
|
stage_index: Optional[List[int]] = None,
|
||||||
shard_config: ShardConfig = None,
|
shard_config: ShardConfig = None,
|
||||||
)-> Union[Tuple, BaseModelOutputWithPast]:
|
) -> Union[Tuple, BaseModelOutputWithPast]:
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
||||||
|
@ -306,7 +306,7 @@ class Qwen2PipelineForwards:
|
||||||
else:
|
else:
|
||||||
hidden_states = outputs.get("hidden_states")
|
hidden_states = outputs.get("hidden_states")
|
||||||
return {"hidden_states": hidden_states}
|
return {"hidden_states": hidden_states}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def qwen2_for_sequence_classification_forward(
|
def qwen2_for_sequence_classification_forward(
|
||||||
self: Qwen2ForSequenceClassification,
|
self: Qwen2ForSequenceClassification,
|
||||||
|
|
|
@ -2,13 +2,13 @@ from .api import (
|
||||||
compute_global_numel,
|
compute_global_numel,
|
||||||
customized_distributed_tensor_to_param,
|
customized_distributed_tensor_to_param,
|
||||||
distribute_tensor,
|
distribute_tensor,
|
||||||
init_as_dtensor,
|
|
||||||
distribute_tensor_with_customization,
|
distribute_tensor_with_customization,
|
||||||
init_tensor_as_customization_distributed,
|
|
||||||
get_device_mesh,
|
get_device_mesh,
|
||||||
get_global_shape,
|
get_global_shape,
|
||||||
get_layout,
|
get_layout,
|
||||||
get_sharding_spec,
|
get_sharding_spec,
|
||||||
|
init_as_dtensor,
|
||||||
|
init_tensor_as_customization_distributed,
|
||||||
is_customized_distributed_tensor,
|
is_customized_distributed_tensor,
|
||||||
is_distributed_tensor,
|
is_distributed_tensor,
|
||||||
is_sharded,
|
is_sharded,
|
||||||
|
|
|
@ -128,7 +128,10 @@ def distribute_tensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_sp
|
||||||
|
|
||||||
return sharded_tensor
|
return sharded_tensor
|
||||||
|
|
||||||
def init_as_dtensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec, global_shape: torch.Size) -> torch.Tensor:
|
|
||||||
|
def init_as_dtensor(
|
||||||
|
tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec, global_shape: torch.Size
|
||||||
|
) -> torch.Tensor:
|
||||||
assert not is_distributed_tensor(tensor), "The input tensor is already a distributed tensor."
|
assert not is_distributed_tensor(tensor), "The input tensor is already a distributed tensor."
|
||||||
dist_layout = Layout(device_mesh=device_mesh, sharding_spec=sharding_spec, global_shape=global_shape)
|
dist_layout = Layout(device_mesh=device_mesh, sharding_spec=sharding_spec, global_shape=global_shape)
|
||||||
|
|
||||||
|
@ -140,6 +143,7 @@ def init_as_dtensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec
|
||||||
|
|
||||||
return tensor
|
return tensor
|
||||||
|
|
||||||
|
|
||||||
def redistribute(dtensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec) -> None:
|
def redistribute(dtensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec) -> None:
|
||||||
"""
|
"""
|
||||||
Convert the layout of the tensor from source_spec to target_spec.
|
Convert the layout of the tensor from source_spec to target_spec.
|
||||||
|
@ -468,7 +472,6 @@ def init_tensor_as_customization_distributed(tensor: torch.Tensor, shard_fn, gat
|
||||||
assert callable(gather_fn), "The gather_fn must be callable."
|
assert callable(gather_fn), "The gather_fn must be callable."
|
||||||
assert not is_distributed_tensor(tensor), "The input tensor is already a distributed tensor."
|
assert not is_distributed_tensor(tensor), "The input tensor is already a distributed tensor."
|
||||||
|
|
||||||
|
|
||||||
# set the shard_fn and gather_fn as attributes of the distributed tensor
|
# set the shard_fn and gather_fn as attributes of the distributed tensor
|
||||||
tensor.shard_fn = shard_fn
|
tensor.shard_fn = shard_fn
|
||||||
tensor.gather_fn = gather_fn
|
tensor.gather_fn = gather_fn
|
||||||
|
|
|
@ -220,7 +220,7 @@ model, optimizer, _criterion, train_dataloader, lr_scheduler = booster.boost(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
## 使用混合并行训练 ViT
|
## 使用混合并行训练 ViT
|
||||||
最后就可以使用混合并行策略来训练模型了。我们先定义一个训练函数,描述训练过程。需要注意的是,如果使用了管道并行策略,需要调用`booster.execute_pipeline`来执行模型的训练,它会调用`scheduler`管理模型的前后向操作。
|
最后就可以使用混合并行策略来训练模型了。我们先定义一个训练函数,描述训练过程。需要注意的是,如果使用了管道并行策略,需要调用`booster.execute_pipeline`来执行模型的训练,它会调用`scheduler`管理模型的前后向操作。
|
||||||
```python
|
```python
|
||||||
def run_forward_backward(
|
def run_forward_backward(
|
||||||
model: nn.Module,
|
model: nn.Module,
|
||||||
|
|
|
@ -119,9 +119,7 @@ def main():
|
||||||
if hasattr(booster.plugin, "stage_manager") and booster.plugin.stage_manager is not None:
|
if hasattr(booster.plugin, "stage_manager") and booster.plugin.stage_manager is not None:
|
||||||
# run pipeline forward backward
|
# run pipeline forward backward
|
||||||
batch = iter([batch])
|
batch = iter([batch])
|
||||||
outputs = booster.execute_pipeline(
|
outputs = booster.execute_pipeline(batch, model, criterion, optimizer, return_loss=True)
|
||||||
batch, model, criterion, optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
outputs = model(**batch)
|
outputs = model(**batch)
|
||||||
loss = criterion(outputs, None)
|
loss = criterion(outputs, None)
|
||||||
|
|
|
@ -121,4 +121,4 @@ class RandomDataset(Dataset):
|
||||||
"input_ids": self.input_ids[idx],
|
"input_ids": self.input_ids[idx],
|
||||||
"attention_mask": self.attention_mask[idx],
|
"attention_mask": self.attention_mask[idx],
|
||||||
"labels": self.input_ids[idx],
|
"labels": self.input_ids[idx],
|
||||||
}
|
}
|
||||||
|
|
|
@ -270,9 +270,7 @@ def main():
|
||||||
) as pbar:
|
) as pbar:
|
||||||
for step in pbar:
|
for step in pbar:
|
||||||
if use_pipeline:
|
if use_pipeline:
|
||||||
outputs = booster.execute_pipeline(
|
outputs = booster.execute_pipeline(dataloader_iter, model, _criterion, optimizer, return_loss=True)
|
||||||
dataloader_iter, model, _criterion, optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
loss = outputs["loss"]
|
loss = outputs["loss"]
|
||||||
else:
|
else:
|
||||||
batch = next(dataloader_iter)
|
batch = next(dataloader_iter)
|
||||||
|
|
|
@ -285,9 +285,7 @@ def main():
|
||||||
) as pbar:
|
) as pbar:
|
||||||
for step in pbar:
|
for step in pbar:
|
||||||
if use_pipeline:
|
if use_pipeline:
|
||||||
outputs = booster.execute_pipeline(
|
outputs = booster.execute_pipeline(dataloader_iter, model, _criterion, optimizer, return_loss=True)
|
||||||
dataloader_iter, model, _criterion, optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
loss = outputs["loss"]
|
loss = outputs["loss"]
|
||||||
else:
|
else:
|
||||||
batch = next(dataloader_iter)
|
batch = next(dataloader_iter)
|
||||||
|
|
|
@ -50,7 +50,6 @@ def all_reduce_mean(x: float, world_size: int) -> float:
|
||||||
|
|
||||||
|
|
||||||
class Timer:
|
class Timer:
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.start_time: Optional[float] = None
|
self.start_time: Optional[float] = None
|
||||||
self.duration: float = 0.0
|
self.duration: float = 0.0
|
||||||
|
@ -112,7 +111,7 @@ class PerformanceEvaluator:
|
||||||
batch_size, seq_len = input_ids.shape
|
batch_size, seq_len = input_ids.shape
|
||||||
|
|
||||||
self.num_samples += batch_size
|
self.num_samples += batch_size
|
||||||
self.flop += (batch_size * seq_len * self.model_numel * 2 * (3 + int(self.enable_grad_checkpoint)))
|
self.flop += batch_size * seq_len * self.model_numel * 2 * (3 + int(self.enable_grad_checkpoint))
|
||||||
|
|
||||||
def on_fit_end(self) -> None:
|
def on_fit_end(self) -> None:
|
||||||
avg_duration = all_reduce_mean(self.timer.duration, self.world_size)
|
avg_duration = all_reduce_mean(self.timer.duration, self.world_size)
|
||||||
|
@ -122,5 +121,6 @@ class PerformanceEvaluator:
|
||||||
if dist.get_rank() == 0:
|
if dist.get_rank() == 0:
|
||||||
print(
|
print(
|
||||||
f"num_samples: {self.num_samples}, dp_world_size: {self.dp_world_size}, flop: {self.flop}, avg_duration: {avg_duration}, "
|
f"num_samples: {self.num_samples}, dp_world_size: {self.dp_world_size}, flop: {self.flop}, avg_duration: {avg_duration}, "
|
||||||
f"avg_throughput: {avg_throughput}")
|
f"avg_throughput: {avg_throughput}"
|
||||||
|
)
|
||||||
print(f"Throughput: {avg_throughput:.2f} samples/sec, TFLOPS per GPU: {avg_tflops_per_gpu:.2f}")
|
print(f"Throughput: {avg_throughput:.2f} samples/sec, TFLOPS per GPU: {avg_tflops_per_gpu:.2f}")
|
||||||
|
|
|
@ -16,17 +16,15 @@ def inference(args):
|
||||||
tokenizer = T5Tokenizer.from_pretrained("google/umt5-small")
|
tokenizer = T5Tokenizer.from_pretrained("google/umt5-small")
|
||||||
if args.model == "test":
|
if args.model == "test":
|
||||||
config = LlamaConfig.from_pretrained("hpcai-tech/openmoe-base")
|
config = LlamaConfig.from_pretrained("hpcai-tech/openmoe-base")
|
||||||
set_openmoe_args(config,
|
set_openmoe_args(
|
||||||
num_experts=config.num_experts,
|
config, num_experts=config.num_experts, moe_layer_interval=config.moe_layer_interval, enable_kernel=True
|
||||||
moe_layer_interval=config.moe_layer_interval,
|
)
|
||||||
enable_kernel=True)
|
|
||||||
model = OpenMoeForCausalLM(config)
|
model = OpenMoeForCausalLM(config)
|
||||||
else:
|
else:
|
||||||
config = LlamaConfig.from_pretrained(f"hpcai-tech/openmoe-{args.model}")
|
config = LlamaConfig.from_pretrained(f"hpcai-tech/openmoe-{args.model}")
|
||||||
set_openmoe_args(config,
|
set_openmoe_args(
|
||||||
num_experts=config.num_experts,
|
config, num_experts=config.num_experts, moe_layer_interval=config.moe_layer_interval, enable_kernel=False
|
||||||
moe_layer_interval=config.moe_layer_interval,
|
)
|
||||||
enable_kernel=False)
|
|
||||||
model = OpenMoeForCausalLM.from_pretrained(f"hpcai-tech/openmoe-{args.model}", config=config)
|
model = OpenMoeForCausalLM.from_pretrained(f"hpcai-tech/openmoe-{args.model}", config=config)
|
||||||
model = model.eval().bfloat16()
|
model = model.eval().bfloat16()
|
||||||
model = model.to(torch.cuda.current_device())
|
model = model.to(torch.cuda.current_device())
|
||||||
|
|
|
@ -172,9 +172,9 @@ def make_state_dict(converted_params):
|
||||||
def load_t5x_weights_in_t5(model, config, t5x_checkpoint_path):
|
def load_t5x_weights_in_t5(model, config, t5x_checkpoint_path):
|
||||||
"""Replaces the params in model witht the T5X converted params."""
|
"""Replaces the params in model witht the T5X converted params."""
|
||||||
variables = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path)
|
variables = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path)
|
||||||
converted = convert_t5x_to_pytorch(variables,
|
converted = convert_t5x_to_pytorch(
|
||||||
num_layers=config.num_hidden_layers,
|
variables, num_layers=config.num_hidden_layers, moe_interval=config.moe_layer_interval
|
||||||
moe_interval=config.moe_layer_interval)
|
)
|
||||||
state_dict = make_state_dict(converted)
|
state_dict = make_state_dict(converted)
|
||||||
model.load_state_dict(state_dict, strict=True)
|
model.load_state_dict(state_dict, strict=True)
|
||||||
|
|
||||||
|
@ -203,11 +203,9 @@ def convert_t5x_checkpoint_to_pytorch(t5x_checkpoint_path, config_file, pytorch_
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Converts a native T5X checkpoint into a PyTorch checkpoint.")
|
parser = argparse.ArgumentParser(description="Converts a native T5X checkpoint into a PyTorch checkpoint.")
|
||||||
# Required parameters
|
# Required parameters
|
||||||
parser.add_argument("--t5x_checkpoint_path",
|
parser.add_argument(
|
||||||
default=None,
|
"--t5x_checkpoint_path", default=None, type=str, required=True, help="Path to the T5X checkpoint."
|
||||||
type=str,
|
)
|
||||||
required=True,
|
|
||||||
help="Path to the T5X checkpoint.")
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--config_file",
|
"--config_file",
|
||||||
default=None,
|
default=None,
|
||||||
|
@ -215,10 +213,8 @@ if __name__ == "__main__":
|
||||||
required=True,
|
required=True,
|
||||||
help="The config json file corresponding to the pre-trained T5 model.\nThis specifies the model architecture.",
|
help="The config json file corresponding to the pre-trained T5 model.\nThis specifies the model architecture.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--pytorch_dump_path",
|
parser.add_argument(
|
||||||
default=None,
|
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
|
||||||
type=str,
|
)
|
||||||
required=True,
|
|
||||||
help="Path to the output PyTorch model.")
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
convert_t5x_checkpoint_to_pytorch(args.t5x_checkpoint_path, args.config_file, args.pytorch_dump_path)
|
convert_t5x_checkpoint_to_pytorch(args.t5x_checkpoint_path, args.config_file, args.pytorch_dump_path)
|
||||||
|
|
|
@ -41,9 +41,7 @@ def train_epoch(epoch, model, optimizer, _criterion, lr_scheduler, dataloader, b
|
||||||
# Forward pass
|
# Forward pass
|
||||||
for _ in pbar:
|
for _ in pbar:
|
||||||
if use_pipeline:
|
if use_pipeline:
|
||||||
outputs = booster.execute_pipeline(
|
outputs = booster.execute_pipeline(dataloader, model, _criterion, optimizer, return_loss=True)
|
||||||
dataloader, model, _criterion, optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
# Backward and optimize
|
# Backward and optimize
|
||||||
if is_pp_last_stage:
|
if is_pp_last_stage:
|
||||||
loss = outputs["loss"]
|
loss = outputs["loss"]
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
from .cpu_adam_arm import CpuAdamArmExtension
|
from .cpu_adam_arm import CpuAdamArmExtension
|
||||||
from .cpu_adam_x86 import CpuAdamX86Extension
|
from .cpu_adam_x86 import CpuAdamX86Extension
|
||||||
|
|
||||||
__all__ = ['CpuAdamArmExtension', 'CpuAdamX86Extension']
|
__all__ = ["CpuAdamArmExtension", "CpuAdamX86Extension"]
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from .layernorm_cuda import LayerNormCudaExtension
|
from .layernorm_cuda import LayerNormCudaExtension
|
||||||
|
|
||||||
__all__ = ["LayerNormCudaExtension"]
|
__all__ = ["LayerNormCudaExtension"]
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from .moe_cuda import MoeCudaExtension
|
from .moe_cuda import MoeCudaExtension
|
||||||
|
|
||||||
__all__ = ['MoeCudaExtension']
|
__all__ = ["MoeCudaExtension"]
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
from .fused_optimizer_cuda import FusedOptimizerCudaExtension
|
from .fused_optimizer_cuda import FusedOptimizerCudaExtension
|
||||||
|
|
||||||
__all__ = ['FusedOptimizerCudaExtension']
|
__all__ = ["FusedOptimizerCudaExtension"]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from .scaled_masked_softmax_cuda import ScaledMaskedSoftmaxCudaExtension
|
from .scaled_masked_softmax_cuda import ScaledMaskedSoftmaxCudaExtension
|
||||||
from .scaled_upper_triangle_masked_softmax_cuda import ScaledUpperTriangleMaskedSoftmaxCudaExtension
|
from .scaled_upper_triangle_masked_softmax_cuda import ScaledUpperTriangleMaskedSoftmaxCudaExtension
|
||||||
|
|
||||||
__all__ = ['ScaledMaskedSoftmaxCudaExtension', 'ScaledUpperTriangleMaskedSoftmaxCudaExtension']
|
__all__ = ["ScaledMaskedSoftmaxCudaExtension", "ScaledUpperTriangleMaskedSoftmaxCudaExtension"]
|
||||||
|
|
|
@ -1,33 +1,33 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from . import custom, diffusers, timm, torchaudio, torchvision, transformers
|
from . import custom, diffusers, timm, torchaudio, torchvision, transformers
|
||||||
from .executor import run_fwd, run_fwd_bwd
|
from .executor import run_fwd, run_fwd_bwd
|
||||||
from .registry import model_zoo
|
from .registry import model_zoo
|
||||||
|
|
||||||
# We pick a subset of models for fast testing in order to reduce the total testing time
|
# We pick a subset of models for fast testing in order to reduce the total testing time
|
||||||
COMMON_MODELS = [
|
COMMON_MODELS = [
|
||||||
'custom_hanging_param_model',
|
"custom_hanging_param_model",
|
||||||
'custom_nested_model',
|
"custom_nested_model",
|
||||||
'custom_repeated_computed_layers',
|
"custom_repeated_computed_layers",
|
||||||
'custom_simple_net',
|
"custom_simple_net",
|
||||||
'diffusers_clip_text_model',
|
"diffusers_clip_text_model",
|
||||||
'diffusers_auto_encoder_kl',
|
"diffusers_auto_encoder_kl",
|
||||||
'diffusers_unet2d_model',
|
"diffusers_unet2d_model",
|
||||||
'timm_densenet',
|
"timm_densenet",
|
||||||
'timm_resnet',
|
"timm_resnet",
|
||||||
'timm_swin_transformer',
|
"timm_swin_transformer",
|
||||||
'torchaudio_wav2vec2_base',
|
"torchaudio_wav2vec2_base",
|
||||||
'torchaudio_conformer',
|
"torchaudio_conformer",
|
||||||
'transformers_bert_for_masked_lm',
|
"transformers_bert_for_masked_lm",
|
||||||
'transformers_bloom_for_causal_lm',
|
"transformers_bloom_for_causal_lm",
|
||||||
'transformers_falcon_for_causal_lm',
|
"transformers_falcon_for_causal_lm",
|
||||||
'transformers_chatglm_for_conditional_generation',
|
"transformers_chatglm_for_conditional_generation",
|
||||||
'transformers_llama_for_casual_lm',
|
"transformers_llama_for_casual_lm",
|
||||||
'transformers_vit_for_masked_image_modeling',
|
"transformers_vit_for_masked_image_modeling",
|
||||||
'transformers_mistral_for_casual_lm'
|
"transformers_mistral_for_casual_lm",
|
||||||
]
|
]
|
||||||
|
|
||||||
IS_FAST_TEST = os.environ.get('FAST_TEST', '0') == '1'
|
IS_FAST_TEST = os.environ.get("FAST_TEST", "0") == "1"
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["model_zoo", "run_fwd", "run_fwd_bwd", 'COMMON_MODELS', 'IS_FAST_TEST']
|
__all__ = ["model_zoo", "run_fwd", "run_fwd_bwd", "COMMON_MODELS", "IS_FAST_TEST"]
|
||||||
|
|
||||||
|
|
|
@ -102,4 +102,4 @@ class ModelZooRegistry(dict):
|
||||||
return new_dict
|
return new_dict
|
||||||
|
|
||||||
|
|
||||||
model_zoo = ModelZooRegistry()
|
model_zoo = ModelZooRegistry()
|
||||||
|
|
|
@ -2,6 +2,7 @@ import torch
|
||||||
|
|
||||||
from colossalai.shardformer.modeling.chatglm2_6b.configuration_chatglm import ChatGLMConfig
|
from colossalai.shardformer.modeling.chatglm2_6b.configuration_chatglm import ChatGLMConfig
|
||||||
from colossalai.shardformer.modeling.chatglm2_6b.modeling_chatglm import ChatGLMForConditionalGeneration, ChatGLMModel
|
from colossalai.shardformer.modeling.chatglm2_6b.modeling_chatglm import ChatGLMForConditionalGeneration, ChatGLMModel
|
||||||
|
|
||||||
from ..registry import ModelAttribute, model_zoo
|
from ..registry import ModelAttribute, model_zoo
|
||||||
|
|
||||||
# ================================
|
# ================================
|
||||||
|
|
|
@ -74,9 +74,7 @@ def exam_state_dict(shard: bool, model_name: str, size_per_shard: int, test_conf
|
||||||
data = data_gen_fn()
|
data = data_gen_fn()
|
||||||
model.train()
|
model.train()
|
||||||
if booster.plugin.stage_manager is not None:
|
if booster.plugin.stage_manager is not None:
|
||||||
booster.execute_pipeline(
|
booster.execute_pipeline(_preprocess_data(data), model, _criterion, optimizer, return_loss=True)
|
||||||
_preprocess_data(data), model, _criterion, optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
output = model(**_preprocess_data(data))
|
output = model(**_preprocess_data(data))
|
||||||
loss = criterion(output)
|
loss = criterion(output)
|
||||||
|
@ -108,9 +106,7 @@ def exam_state_dict(shard: bool, model_name: str, size_per_shard: int, test_conf
|
||||||
data_for_shard = data_gen_fn()
|
data_for_shard = data_gen_fn()
|
||||||
data_for_origin = data_gen_fn()
|
data_for_origin = data_gen_fn()
|
||||||
if booster.plugin.stage_manager is not None:
|
if booster.plugin.stage_manager is not None:
|
||||||
booster.execute_pipeline(
|
booster.execute_pipeline(_preprocess_data(data_for_shard), model, _criterion, optimizer, return_loss=True)
|
||||||
_preprocess_data(data_for_shard), model, _criterion, optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
booster.execute_pipeline(
|
booster.execute_pipeline(
|
||||||
_preprocess_data(data_for_origin),
|
_preprocess_data(data_for_origin),
|
||||||
new_model,
|
new_model,
|
||||||
|
|
|
@ -113,6 +113,7 @@ def check_torch_fsdp_ckpt():
|
||||||
full_osd = FSDP.full_optim_state_dict(optimizer.unwrap_model().unwrap(), optim=optimizer)
|
full_osd = FSDP.full_optim_state_dict(optimizer.unwrap_model().unwrap(), optim=optimizer)
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
sharded_osd = copy.deepcopy(full_osd)
|
sharded_osd = copy.deepcopy(full_osd)
|
||||||
|
|
||||||
run_model()
|
run_model()
|
||||||
|
|
|
@ -1,16 +1,8 @@
|
||||||
import math
|
|
||||||
import time
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
|
||||||
import transformers
|
|
||||||
from packaging import version
|
from packaging import version
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import triton
|
|
||||||
import triton.language as tl
|
|
||||||
HAS_TRITON = True
|
HAS_TRITON = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
HAS_TRITON = False
|
HAS_TRITON = False
|
||||||
|
@ -22,6 +14,7 @@ try:
|
||||||
from exllama_kernels import prepare_buffers, set_tuning_params
|
from exllama_kernels import prepare_buffers, set_tuning_params
|
||||||
|
|
||||||
from colossalai.inference.quant.gptq import CaiQuantLinear
|
from colossalai.inference.quant.gptq import CaiQuantLinear
|
||||||
|
|
||||||
HAS_AUTO_GPTQ = True
|
HAS_AUTO_GPTQ = True
|
||||||
except:
|
except:
|
||||||
HAS_AUTO_GPTQ = False
|
HAS_AUTO_GPTQ = False
|
||||||
|
@ -32,13 +25,14 @@ import warnings
|
||||||
HAS_GPTQ_CUDA = False
|
HAS_GPTQ_CUDA = False
|
||||||
try:
|
try:
|
||||||
from colossalai.kernel.op_builder.gptq import GPTQBuilder
|
from colossalai.kernel.op_builder.gptq import GPTQBuilder
|
||||||
|
|
||||||
gptq_cuda = GPTQBuilder().load()
|
gptq_cuda = GPTQBuilder().load()
|
||||||
HAS_GPTQ_CUDA = True
|
HAS_GPTQ_CUDA = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
warnings.warn('CUDA gptq is not installed')
|
warnings.warn("CUDA gptq is not installed")
|
||||||
HAS_GPTQ_CUDA = False
|
HAS_GPTQ_CUDA = False
|
||||||
|
|
||||||
TRITON_CUDA_SUPPORT = version.parse(torch.version.cuda) > version.parse('11.4')
|
TRITON_CUDA_SUPPORT = version.parse(torch.version.cuda) > version.parse("11.4")
|
||||||
|
|
||||||
max_inner_outer_dim = 1
|
max_inner_outer_dim = 1
|
||||||
max_input_len = 1
|
max_input_len = 1
|
||||||
|
@ -64,9 +58,9 @@ def init_buffer(cai_linear, use_act_order=False):
|
||||||
max_input_len = 4096
|
max_input_len = 4096
|
||||||
# The temp_state buffer is required to reorder X in the act-order case.
|
# The temp_state buffer is required to reorder X in the act-order case.
|
||||||
# The temp_dq buffer is required to dequantize weights when using cuBLAS, typically for the prefill.
|
# The temp_dq buffer is required to dequantize weights when using cuBLAS, typically for the prefill.
|
||||||
gptq_temp_state_buffer = torch.zeros((max_input_len, max_inner_outer_dim),
|
gptq_temp_state_buffer = torch.zeros(
|
||||||
dtype=torch.float16,
|
(max_input_len, max_inner_outer_dim), dtype=torch.float16, device=torch.cuda.current_device()
|
||||||
device=torch.cuda.current_device())
|
)
|
||||||
gptq_temp_dq_buffer = torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=torch.cuda.current_device())
|
gptq_temp_dq_buffer = torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=torch.cuda.current_device())
|
||||||
|
|
||||||
gptq_cuda.prepare_buffers(torch.device(torch.cuda.current_device()), gptq_temp_state_buffer, gptq_temp_dq_buffer)
|
gptq_cuda.prepare_buffers(torch.device(torch.cuda.current_device()), gptq_temp_state_buffer, gptq_temp_dq_buffer)
|
||||||
|
@ -77,10 +71,11 @@ def init_buffer(cai_linear, use_act_order=False):
|
||||||
gptq_cuda.set_tuning_params(matmul_recons_thd, matmul_fused_remap, matmul_no_half2)
|
gptq_cuda.set_tuning_params(matmul_recons_thd, matmul_fused_remap, matmul_no_half2)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not TRITON_CUDA_SUPPORT or not HAS_TRITON or not HAS_AUTO_GPTQ,
|
@pytest.mark.skipif(
|
||||||
reason="triton requires cuda version to be higher than 11.4 or not install auto-gptq")
|
not TRITON_CUDA_SUPPORT or not HAS_TRITON or not HAS_AUTO_GPTQ,
|
||||||
|
reason="triton requires cuda version to be higher than 11.4 or not install auto-gptq",
|
||||||
|
)
|
||||||
def test_gptq_linear():
|
def test_gptq_linear():
|
||||||
|
|
||||||
infeature = 1024
|
infeature = 1024
|
||||||
outfeature = 1024
|
outfeature = 1024
|
||||||
group_size = 128
|
group_size = 128
|
||||||
|
@ -120,7 +115,7 @@ def test_gptq_linear():
|
||||||
max_input_len = 2048
|
max_input_len = 2048
|
||||||
buffers = {
|
buffers = {
|
||||||
"temp_state": torch.zeros((max_input_len, max_inner_outer_dim), dtype=torch.float16, device=device),
|
"temp_state": torch.zeros((max_input_len, max_inner_outer_dim), dtype=torch.float16, device=device),
|
||||||
"temp_dq": torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=device)
|
"temp_dq": torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=device),
|
||||||
}
|
}
|
||||||
|
|
||||||
prepare_buffers(device, buffers["temp_state"], buffers["temp_dq"])
|
prepare_buffers(device, buffers["temp_state"], buffers["temp_dq"])
|
||||||
|
@ -146,5 +141,4 @@ def test_gptq_linear():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
test_gptq_linear()
|
test_gptq_linear()
|
||||||
|
|
|
@ -24,4 +24,4 @@ def test_torchvision_models_lazy_init(subset, default_device):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_torchvision_models_lazy_init("transformers", "cpu")
|
test_torchvision_models_lazy_init("transformers", "cpu")
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import torch
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import torch
|
||||||
|
|
||||||
from colossalai.nn.optimizer import CPUAdam, HybridAdam
|
from colossalai.nn.optimizer import CPUAdam, HybridAdam
|
||||||
from colossalai.testing import clear_cache_before_run, parameterize
|
from colossalai.testing import clear_cache_before_run, parameterize
|
||||||
|
@ -17,6 +17,7 @@ def check_params_equal(model, torch_model):
|
||||||
for p, torch_p in zip(model.parameters(), torch_model.parameters()):
|
for p, torch_p in zip(model.parameters(), torch_model.parameters()):
|
||||||
assert torch.allclose(p, torch_p, atol=1e-3), f"diff: {torch.abs(p - torch_p)}"
|
assert torch.allclose(p, torch_p, atol=1e-3), f"diff: {torch.abs(p - torch_p)}"
|
||||||
|
|
||||||
|
|
||||||
# TODO Something wrong with ci when running this test.
|
# TODO Something wrong with ci when running this test.
|
||||||
@pytest.mark.skip(reason="skip because of something wrong with CI")
|
@pytest.mark.skip(reason="skip because of something wrong with CI")
|
||||||
@clear_cache_before_run()
|
@clear_cache_before_run()
|
||||||
|
|
|
@ -103,9 +103,7 @@ def run_pp(
|
||||||
torch_loss = criterion(torch_output)
|
torch_loss = criterion(torch_output)
|
||||||
torch_loss.backward()
|
torch_loss.backward()
|
||||||
|
|
||||||
pp_ret = schedule.forward_backward_step(
|
pp_ret = schedule.forward_backward_step(sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True)
|
||||||
sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# check loss
|
# check loss
|
||||||
if stage_manager.is_last_stage(ignore_chunk=True):
|
if stage_manager.is_last_stage(ignore_chunk=True):
|
||||||
|
|
|
@ -99,9 +99,7 @@ def examine_pp(num_microbatch: int, batch_size: int):
|
||||||
torch_output = torch_model(input_list[0])
|
torch_output = torch_model(input_list[0])
|
||||||
torch_loss = criterion(torch_output)
|
torch_loss = criterion(torch_output)
|
||||||
torch_loss.backward()
|
torch_loss.backward()
|
||||||
pp_ret = schedule.forward_backward_step(
|
pp_ret = schedule.forward_backward_step(sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True)
|
||||||
sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# check loss
|
# check loss
|
||||||
if stage_manager.is_last_stage():
|
if stage_manager.is_last_stage():
|
||||||
|
|
Loading…
Reference in New Issue