Browse Source

[devops] remove post commit ci (#5566)

* [devops] remove post commit ci

* [misc] run pre-commit on all files

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/5578/head
Hongxin Liu 8 months ago committed by GitHub
parent
commit
641b1ee71a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      .github/pull_request_template.md
  2. 97
      .github/workflows/post_commit.yml
  3. 2
      .gitignore
  4. 2
      LICENSE
  5. 3
      applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py
  6. 2
      applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py
  7. 119
      applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py
  8. 6
      applications/Colossal-LLaMA-2/docs/example_13b.md
  9. 2
      applications/Colossal-LLaMA-2/docs/example_7b.md
  10. 2
      applications/Colossal-LLaMA-2/hostfile.example
  11. 2
      applications/Colossal-LLaMA-2/inference_example.py
  12. 1
      applications/Colossal-LLaMA-2/requirements.txt
  13. 45
      applications/Colossal-LLaMA-2/stream_chat_example.py
  14. 2
      applications/Colossal-LLaMA-2/version.txt
  15. 4
      applications/ColossalChat/examples/training_scripts/train_dpo.py
  16. 1
      applications/ColossalEval/examples/dataset_evaluation/inference.py
  17. BIN
      applications/ColossalMoE/README.md
  18. 1
      applications/ColossalMoE/infer.py
  19. 18
      applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py
  20. 2
      applications/ColossalQA/colossalqa/data_loader/document_loader.py
  21. 75
      applications/ColossalQA/colossalqa/data_loader/table_dataloader.py
  22. 66
      applications/ColossalQA/colossalqa/local/colossalcloud_llm.py
  23. 1
      applications/ColossalQA/colossalqa/local/llm.py
  24. 1
      applications/ColossalQA/colossalqa/prompt/prompt.py
  25. 8
      applications/ColossalQA/colossalqa/retriever.py
  26. 6
      applications/ColossalQA/data/data_sample/companies.txt
  27. 2
      applications/ColossalQA/data/data_sample/companies_zh.txt
  28. 2
      applications/ColossalQA/data/data_sample/csv_organization_100.csv
  29. 2
      applications/ColossalQA/data/tests/64KB.json
  30. 202
      applications/ColossalQA/data/tests/companies.csv
  31. 48
      applications/ColossalQA/data/tests/test.html
  32. 4
      applications/ColossalQA/data/tests/test.md
  33. 76
      applications/ColossalQA/data/tests/test.txt
  34. 33
      applications/ColossalQA/examples/retrieval_conversation_universal.py
  35. 12
      applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py
  36. 2
      applications/ColossalQA/examples/webui_demo/README.md
  37. 52
      applications/ColossalQA/examples/webui_demo/config.py
  38. 25
      applications/ColossalQA/examples/webui_demo/server.py
  39. 31
      applications/ColossalQA/examples/webui_demo/webui.py
  40. 2
      applications/ColossalQA/pytest.ini
  41. 14
      applications/ColossalQA/tests/test_document_loader.py
  42. 80
      applications/ColossalQA/tests/test_retrieval_qa.py
  43. 2
      applications/ColossalQA/version.txt
  44. 2
      colossalai/__init__.py
  45. 9
      colossalai/booster/plugin/torch_fsdp_plugin.py
  46. 3
      colossalai/checkpoint_io/utils.py
  47. 25
      colossalai/cluster/process_group_mesh.py
  48. 49
      colossalai/inference/engine/modeling/llama.py
  49. 202
      colossalai/inference/quant/gptq/cai_gptq/cai_quant_linear.py
  50. 133
      colossalai/kernel/triton/context_attention.py
  51. 51
      colossalai/kernel/triton/flash_decoding.py
  52. 65
      colossalai/kernel/triton/llama_act_combine_kernel.py
  53. 20
      colossalai/kernel/triton/token_attention_kernel.py
  54. 4
      colossalai/legacy/inference/tensor_parallel/modeling/llama.py
  55. 2
      colossalai/shardformer/layer/__init__.py
  56. 4
      colossalai/tensor/d_tensor/__init__.py
  57. 7
      colossalai/tensor/d_tensor/api.py
  58. 1
      colossalai/zero/low_level/_utils.py
  59. 2
      docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md
  60. 4
      examples/images/vit/vit_benchmark.py
  61. 2
      examples/language/data_utils.py
  62. 4
      examples/language/llama2/finetune.py
  63. 4
      examples/language/llama2/pretrain.py
  64. 6
      examples/language/openmoe/benchmark/utils.py
  65. 14
      examples/language/openmoe/infer.py
  66. 22
      examples/language/openmoe/model/convert_openmoe_ckpt.py
  67. 4
      examples/language/opt/opt_train_demo.py
  68. 3
      extensions/cpu_adam/__init__.py
  69. 2
      extensions/layernorm/__init__.py
  70. 2
      extensions/moe/__init__.py
  71. 2
      extensions/optimizer/__init__.py
  72. 2
      extensions/softmax/__init__.py
  73. 44
      tests/kit/model_zoo/__init__.py
  74. 2
      tests/kit/model_zoo/registry.py
  75. 1
      tests/kit/model_zoo/transformers/chatglm2.py
  76. 8
      tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py
  77. 1
      tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py
  78. 30
      tests/test_gptq/test_gptq_linear.py
  79. 2
      tests/test_lazy/test_models.py
  80. 3
      tests/test_optimizer/test_nvme.py
  81. 4
      tests/test_pipeline/test_schedule/test_interleaved.py
  82. 4
      tests/test_pipeline/test_schedule/test_oneF_oneB.py

1
.github/pull_request_template.md

@ -3,6 +3,7 @@
- [ ] I have created an issue for this PR for traceability
- [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description`
- [ ] I have added relevant tags if possible for us to better distinguish different PRs
- [ ] I have installed pre-commit: `pip install pre-commit && pre-commit install`
## 🚨 Issue number

97
.github/workflows/post_commit.yml

@ -1,97 +0,0 @@
name: post-commit
on:
pull_request:
types:
- closed
jobs:
# this job will run after a PR is merged to run pre-commit on any changed file
# so that the user does not need to learn pre-commit and pre-commit can still
# be auto-executed by the workflow
pre-commit:
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true && github.repository == 'hpcaitech/ColossalAI'
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
# the PR branch and the hpcaitech/colossal-ai main branch
# must share a common commit, we need to locate that commit,
# which is the commit checked-out or forked when the PR branch is created
# such that we can look for files changed since that commit
- name: Locate base commit
id: locate-base-sha
run: |
curBranch=$(git rev-parse --abbrev-ref HEAD)
commonCommit=$(git merge-base origin/main $curBranch)
echo $commonCommit
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v35
with:
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
- name: List all changed files
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
echo "$file was changed"
done
# check out the main branch
- uses: actions/checkout@v2
with:
ref: 'main'
- uses: actions/setup-python@v3
- name: Cache pre-commit hooks
uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: ${{ runner.os }}-pre-commit-hooks
- name: Set up pre-commit
run: |
pip install pre-commit
pre-commit install
# run pre-commit on changed files
- name: Run Pre-commit
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
pre-commit run --files $file || true
done
# create commit for pre-commit
# when all files are well formatted, there is no need to create a commit
# therefore, this step will produce an error, which should be allowed
- name: Create commits
id: commit
continue-on-error: true
run: |
git config --global user.name 'github-actions'
git config --global user.email 'github-actions@github.com'
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
git add -A
git commit -am "[format] applied code formatting on changed files in pull request ${{ github.event.pull_request.number }}"
# create pull request
- name: Create Pull Request
if: steps.commit.outcome == 'success'
id: cpr
uses: peter-evans/create-pull-request@v4
with:
branch: pre-commit-${{ github.event.pull_request.number }}
title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
- name: Enable Auto-merge for the New PR
if: steps.commit.outcome == 'success'
uses: peter-evans/enable-pull-request-automerge@v2
with:
pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
merge-method: squash

2
.gitignore vendored

@ -162,4 +162,4 @@ coverage.xml
# log, test files - ColossalChat
applications/ColossalChat/logs
applications/ColossalChat/tests/logs
applications/ColossalChat/tests/logs

2
LICENSE

@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
THE SOFTWARE.

3
applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py

@ -8,11 +8,10 @@ import argparse
import numpy as np
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
from transformers import LlamaForCausalLM, LlamaTokenizer
from colossalai.logging import get_dist_logger
logger = get_dist_logger()

2
applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py

@ -10,8 +10,8 @@ import os
from typing import Any, Dict, Tuple, Union
import torch
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.optimizer import Optimizer
from colossalai.booster import Booster
from colossalai.cluster import DistCoordinator

119
applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py

@ -1,20 +1,19 @@
from copy import deepcopy
from typing import Optional, List, Dict, Tuple, Callable, Any
from typing import Any, Callable, Dict, List, Optional, Tuple
import torch
from torch import nn
from transformers import PreTrainedTokenizer
from transformers.utils import logging
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
from transformers.utils import logging
logger = logging.get_logger(__name__)
def get_prompt_template(
input_query:str,
history:List[Dict]= None,
roles:list = ["", "Human", "Assistant"],
input_query: str,
history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"],
) -> str:
"""
Generates a prompt template for chat models based on input and history.
@ -32,7 +31,7 @@ def get_prompt_template(
new_history = []
else:
new_history = deepcopy(history)
new_history.append({"role": roles[1], "message": input_query.strip()})
new_history.append({"role": roles[2], "message": None})
@ -48,22 +47,23 @@ def get_prompt_template(
prompt += f"{role}: <s>"
return prompt
@torch.inference_mode()
def streaming_chat(
model: Any,
model: Any,
tokenizer: PreTrainedTokenizer,
input_query: str,
history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"],
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
temperature: float = 0.8,
top_p: float = 0.95,
top_k: int = 50,
do_sample: bool = True,
input_query: str,
history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"],
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
temperature: float = 0.8,
top_p: float = 0.95,
top_k: int = 50,
do_sample: bool = True,
length_penalty: float = 1.2,
max_new_tokens: int = 512,
logits_processor: LogitsProcessorList = None,
return_past_key_values: bool = False,
max_new_tokens: int = 512,
logits_processor: LogitsProcessorList = None,
return_past_key_values: bool = False,
**kwargs,
):
"""
@ -87,7 +87,7 @@ def streaming_chat(
**kwargs: Additional keyword arguments for generation.
Yields:
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
optionally the updated past key values if `return_past_key_values` is True.
Ensures padding is on the left side for the tokenizer.
@ -97,32 +97,37 @@ def streaming_chat(
history = []
if logits_processor is None:
logits_processor = LogitsProcessorList()
generation_kwargs = {
'temperature': temperature,
'top_p': top_p,
'top_k': top_k,
'do_sample': do_sample,
'max_new_tokens': max_new_tokens,
'length_penalty': length_penalty,
'use_cache': True,
**kwargs
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"do_sample": do_sample,
"max_new_tokens": max_new_tokens,
"length_penalty": length_penalty,
"use_cache": True,
**kwargs,
}
prompt_str = get_prompt_template(input_query, history=history, roles=roles)
eos_token_id = [tokenizer.eos_token_id]
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
history.append({"role": roles[1], "message": input_query.strip()})
history.append({"role": roles[2], "message": None})
for outputs in stream_generate(model, **inputs, past_key_values=past_key_values,
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
**generation_kwargs):
for outputs in stream_generate(
model,
**inputs,
past_key_values=past_key_values,
eos_token_id=eos_token_id,
return_past_key_values=return_past_key_values,
**generation_kwargs,
):
if return_past_key_values:
outputs, past_key_values = outputs
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
response = tokenizer.decode(outputs)
history[-1]["message"] = response.strip()
@ -130,30 +135,30 @@ def streaming_chat(
yield response, history, past_key_values
else:
yield response, history
@torch.inference_mode()
def stream_generate(
model: Any,
input_ids: torch.Tensor,
model: Any,
input_ids: torch.Tensor,
generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
return_past_key_values: bool = False,
return_past_key_values: bool = False,
**kwargs,
):
"""
Generates sequences of token ids using the specified model and generation parameters.
Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
Args:
model (Any): The model used for generating sequences of token ids.
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
and generation config.
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
and a generation config.
prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
@ -169,7 +174,7 @@ def stream_generate(
generation_config = model.generation_config
generation_config = deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs)
eos_token_id = generation_config.eos_token_id
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
@ -177,25 +182,25 @@ def stream_generate(
if generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_len
if input_ids_len >= generation_config.max_length:
input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
logger.warning(
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
# prepare distribution pre_processing samplers
logits_processor = model._get_logits_processor(
generation_config=generation_config,
input_ids_seq_length=input_ids_len,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
generation_config=generation_config,
input_ids_seq_length=input_ids_len,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
# prepare stopping criteria
stopping_criteria = model._get_stopping_criteria(
@ -205,7 +210,7 @@ def stream_generate(
logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None
while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token
@ -244,4 +249,4 @@ def stream_generate(
yield input_ids
# stop when each sentence is finished, or if exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break
break

6
applications/Colossal-LLaMA-2/docs/example_13b.md

@ -43,7 +43,7 @@ if __name__ == '__main__':
model.to(device)
tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base")
question = "xxx" # Your question.
prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n"
f"{question}"
@ -111,7 +111,7 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
| 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 |
| 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 |
#### Finance
#### Finance
| Question | <center>Colossal-LLaMA-2-13b-base</center> |
| :------: | :----------------------- |
| 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。|
@ -150,4 +150,4 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
## Conclusion
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.

2
applications/Colossal-LLaMA-2/docs/example_7b.md

@ -242,4 +242,4 @@ To comprehensively assess the performance of the Colossal-LLaMA-2-7B-base model,
## Conclusion
In general, the Colossal-LLaMA-2-7B-base model not only enhances its understanding of English but also exhibits significant improvements in its comprehension of Chinese. It boasts a broad spectrum of general knowledge, encompassing various fields such as food, sports, technology, literature, games, and more. Regarding text generation tasks, the Colossal-LLaMA-2-7B-base model excels in writing performance; however, its ability to generate specific formats like code, emails, tables, etc., needs enhancement due to the scarcity of relevant training data during our training phase. When compared to the Qwen-7b-base model, the Colossal-LLaMA-2-7B-base model outperforms it in answering most English questions and some Chinese questions, as demonstrated in the examples above.
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.

2
applications/Colossal-LLaMA-2/hostfile.example

@ -1,2 +1,2 @@
hostname1
hostname2
hostname2

2
applications/Colossal-LLaMA-2/inference_example.py

@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs):
model.to(device)
try:
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left')
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
except OSError:
raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")

1
applications/Colossal-LLaMA-2/requirements.txt

@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5
tqdm
sentencepiece==0.1.99
protobuf<=3.20.0

45
applications/Colossal-LLaMA-2/stream_chat_example.py

@ -1,11 +1,11 @@
import os
import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
from colossal_llama2.utils.stream_chat_patch import streaming_chat
from transformers import AutoModelForCausalLM, AutoTokenizer
SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
def main(args):
model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
@ -27,29 +27,34 @@ def main(args):
print(f"\n{roles[2]}: ", end="")
gen_len = 0
for response, history, past_key_values in streaming_chat(
model, tokenizer, input_query, history=history, roles=roles,
temperature = args.temperature,
top_p = args.top_p,
top_k = args.top_k,
do_sample = args.do_sample,
length_penalty = args.length_penalty,
max_new_tokens = args.max_new_tokens,
model,
tokenizer,
input_query,
history=history,
roles=roles,
temperature=args.temperature,
top_p=args.top_p,
top_k=args.top_k,
do_sample=args.do_sample,
length_penalty=args.length_penalty,
max_new_tokens=args.max_new_tokens,
past_key_values=past_key_values,
return_past_key_values=True):
return_past_key_values=True,
):
output = response[gen_len:]
print(output, end="", flush=True)
gen_len = len(response)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default=None, help="path to chat version model")
parser.add_argument('--tokenizer_path', type=str, default=None, help="path to chat version tokenizer")
parser.add_argument('--temperature', type=float, default=0.8, help="set temperature")
parser.add_argument('--top_p', type=float, default=0.95, help="set top p value")
parser.add_argument('--top_k', type=int, default=50, help="set top k value")
parser.add_argument('--do_sample', type=bool, default=True, help="whether turn on do_sample or not")
parser.add_argument('--length_penalty', type=float, default=1.2, help="set length penalty")
parser.add_argument('--max_new_tokens', type=int, default=512, help="set max new tokens")
parser.add_argument("--model_path", type=str, default=None, help="path to chat version model")
parser.add_argument("--tokenizer_path", type=str, default=None, help="path to chat version tokenizer")
parser.add_argument("--temperature", type=float, default=0.8, help="set temperature")
parser.add_argument("--top_p", type=float, default=0.95, help="set top p value")
parser.add_argument("--top_k", type=int, default=50, help="set top k value")
parser.add_argument("--do_sample", type=bool, default=True, help="whether turn on do_sample or not")
parser.add_argument("--length_penalty", type=float, default=1.2, help="set length penalty")
parser.add_argument("--max_new_tokens", type=int, default=512, help="set max new tokens")
args = parser.parse_args()
main(args)
main(args)

2
applications/Colossal-LLaMA-2/version.txt

@ -1 +1 @@
0.0.1
0.0.1

4
applications/ColossalChat/examples/training_scripts/train_dpo.py

@ -20,13 +20,13 @@ import colossalai
from colossalai.booster import Booster
from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin
from colossalai.cluster import DistCoordinator
from colossalai.logging import get_dist_logger
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
from colossalai.nn.optimizer import HybridAdam
from colossalai.utils import get_current_device
from colossalai.logging import get_dist_logger
logger = get_dist_logger()
def train(args):
# check lora compatibility
if "gemini" in args.plugin and args.lora_rank > 0:

1
applications/ColossalEval/examples/dataset_evaluation/inference.py

@ -3,7 +3,6 @@ import copy
import os
from typing import Dict, List
import torch
import torch.distributed as dist
from colossal_eval import dataset, models, utils

BIN
applications/ColossalMoE/README.md

Binary file not shown.

1
applications/ColossalMoE/infer.py

@ -106,6 +106,5 @@ def main():
print(f"[{coordinator.rank}] {outputs}")
if __name__ == "__main__":
main()

18
applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py

@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field
from langchain.schema import BaseRetriever, Document
from langchain.schema.language_model import BaseLanguageModel
class CustomBaseRetrievalQA(BaseRetrievalQA):
"""Base class for question-answering chains."""
@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
for k, v in inputs.items()
if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"]
}
answers = []
if self.combine_documents_chain.memory is not None:
buffered_history_backup, summarized_history_temp_backup = copy.deepcopy(
self.combine_documents_chain.memory.buffered_history
@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup)
# if rejection_trigger_keywords is not given, return the response from LLM directly
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', [])
rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None
if answer is None:
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。")
if answer is None:
answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
if self.combine_documents_chain.memory is not None:
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs
)
# if rejection_trigger_keywords is not given, return the response from LLM directly
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', [])
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords)==0 else None
rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
answer = (
answer
if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords) == 0
else None
)
if answer is None:
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。")
answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
if self.return_source_documents:

2
applications/ColossalQA/colossalqa/data_loader/document_loader.py

@ -126,7 +126,7 @@ class DocumentLoader:
else:
# May ba a directory, we strictly follow the glob path and will not load files in subdirectories
pass
def clear(self):
"""
Clear loaded data.

75
applications/ColossalQA/colossalqa/data_loader/table_dataloader.py

@ -1,39 +1,40 @@
'''
"""
Class for loading table type data. please refer to Pandas-Input/Output for file format details.
'''
"""
import os
import glob
import os
import pandas as pd
from sqlalchemy import create_engine
from colossalqa.utils import drop_table
from colossalqa.mylogging import get_logger
from colossalqa.utils import drop_table
from sqlalchemy import create_engine
logger = get_logger()
SUPPORTED_DATA_FORMAT = ['.csv','.xlsx', '.xls','.json','.html','.h5', '.hdf5','.parquet','.feather','.dta']
SUPPORTED_DATA_FORMAT = [".csv", ".xlsx", ".xls", ".json", ".html", ".h5", ".hdf5", ".parquet", ".feather", ".dta"]
class TableLoader:
'''
"""
Load tables from different files and serve a sql database for database operations
'''
def __init__(self, files: str,
sql_path:str='sqlite:///mydatabase.db',
verbose=False, **kwargs) -> None:
'''
"""
def __init__(self, files: str, sql_path: str = "sqlite:///mydatabase.db", verbose=False, **kwargs) -> None:
"""
Args:
files: list of files (list[file path, name])
sql_path: how to serve the sql database
**kwargs: keyword type arguments, useful for certain document types
'''
**kwargs: keyword type arguments, useful for certain document types
"""
self.data = {}
self.verbose = verbose
self.sql_path = sql_path
self.kwargs = kwargs
self.sql_engine = create_engine(self.sql_path)
drop_table(self.sql_engine)
self.sql_engine = create_engine(self.sql_path)
for item in files:
path = item[0]
@ -42,68 +43,68 @@ class TableLoader:
raise FileNotFoundError(f"{path} doesn't exists")
if not any([path.endswith(i) for i in SUPPORTED_DATA_FORMAT]):
raise TypeError(f"{path} not supported. Supported type {SUPPORTED_DATA_FORMAT}")
logger.info("loading data", verbose=self.verbose)
self.load_data(path)
logger.info("data loaded", verbose=self.verbose)
self.to_sql(path, dataset_name)
def load_data(self, path):
'''
"""
Load data and serve the data as sql database.
Data must be in pandas format
'''
"""
files = []
# Handle glob expression
try:
files = glob.glob(path)
except Exception as e:
logger.error(e)
if len(files)==0:
if len(files) == 0:
raise ValueError("Unsupported file/directory format. For directories, please use glob expression")
elif len(files)==1:
elif len(files) == 1:
path = files[0]
else:
for file in files:
self.load_data(file)
if path.endswith('.csv'):
if path.endswith(".csv"):
# Load csv
self.data[path] = pd.read_csv(path)
elif path.endswith('.xlsx') or path.endswith('.xls'):
elif path.endswith(".xlsx") or path.endswith(".xls"):
# Load excel
self.data[path] = pd.read_excel(path) # You can adjust the sheet_name as needed
elif path.endswith('.json'):
elif path.endswith(".json"):
# Load json
self.data[path] = pd.read_json(path)
elif path.endswith('.html'):
elif path.endswith(".html"):
# Load html
html_tables = pd.read_html(path)
# Choose the desired table from the list of DataFrame objects
self.data[path] = html_tables[0] # You may need to adjust this index
elif path.endswith('.h5') or path.endswith('.hdf5'):
elif path.endswith(".h5") or path.endswith(".hdf5"):
# Load h5
self.data[path] = pd.read_hdf(path, key=self.kwargs.get('key', 'data')) # You can adjust the key as needed
elif path.endswith('.parquet'):
self.data[path] = pd.read_hdf(path, key=self.kwargs.get("key", "data")) # You can adjust the key as needed
elif path.endswith(".parquet"):
# Load parquet
self.data[path] = pd.read_parquet(path, engine='fastparquet')
elif path.endswith('.feather'):
self.data[path] = pd.read_parquet(path, engine="fastparquet")
elif path.endswith(".feather"):
# Load feather
self.data[path] = pd.read_feather(path)
elif path.endswith('.dta'):
elif path.endswith(".dta"):
# Load dta
self.data[path] = pd.read_stata(path)
else:
raise ValueError("Unsupported file format")
def to_sql(self, path, table_name):
'''
"""
Serve the data as sql database.
'''
self.data[path].to_sql(table_name, con=self.sql_engine, if_exists='replace', index=False)
"""
self.data[path].to_sql(table_name, con=self.sql_engine, if_exists="replace", index=False)
logger.info(f"Loaded to Sqlite3\nPath: {path}", verbose=self.verbose)
return self.sql_path
def get_sql_path(self):
return self.sql_path
@ -113,7 +114,3 @@ class TableLoader:
self.sql_engine.dispose()
del self.data
del self.sql_engine

66
applications/ColossalQA/colossalqa/local/colossalcloud_llm.py

@ -21,7 +21,7 @@ print(resp) # super-heavyweight awesome-natured yawning Australian creature!
"""
import json
from typing import Any, List, Mapping, Optional
from typing import Any, Mapping
import requests
from langchain.llms.base import LLM
@ -31,31 +31,31 @@ from langchain.utils import get_from_dict_or_env
class ColossalCloudLLM(LLM):
"""
A custom LLM class that integrates LLMs running on the ColossalCloud Platform
"""
n: int
gen_config: dict = None
n: int
gen_config: dict = None
auth_config: dict = None
valid_gen_para: list = ['max_new_tokens', 'top_k',
'top_p', 'temperature', 'repetition_penalty']
valid_gen_para: list = ["max_new_tokens", "top_k", "top_p", "temperature", "repetition_penalty"]
def __init__(self, gen_config=None, **kwargs):
"""
Args:
gen_config: config for generation,
max_new_tokens: 50 by default
top_k: (1, vocab_size)
top_k: (1, vocab_size)
top_p: (0, 1) if not None
temperature: (0, inf) if not None
temperature: (0, inf) if not None
repetition_penalty: (1, inf) if not None
"""
super(ColossalCloudLLM, self).__init__(**kwargs)
if gen_config is None:
self.gen_config = {"max_new_tokens": 50}
else:
if gen_config is None:
self.gen_config = {"max_new_tokens": 50}
else:
assert "max_new_tokens" in gen_config, "max_new_tokens is a compulsory key in the gen config"
self.gen_config = gen_config
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""Get the identifying parameters."""
@ -63,17 +63,17 @@ class ColossalCloudLLM(LLM):
@property
def _llm_type(self) -> str:
return 'ColossalCloudLLM'
return "ColossalCloudLLM"
def set_auth_config(self, **kwargs):
url = get_from_dict_or_env(kwargs, "url", "URL")
host = get_from_dict_or_env(kwargs, "host", "HOST")
auth_config = {}
auth_config['endpoint'] = url
auth_config['Host'] = host
auth_config["endpoint"] = url
auth_config["Host"] = host
self.auth_config = auth_config
def _call(self, prompt: str, stop=None, **kwargs: Any) -> str:
"""
Args:
@ -81,15 +81,17 @@ class ColossalCloudLLM(LLM):
stop: A list of strings to stop generation when encountered
Returns:
The string generated by the model
The string generated by the model
"""
# Update the generation arguments
for key, value in kwargs.items():
if key not in self.valid_gen_para:
raise KeyError(f"Invalid generation parameter: '{key}'. Valid keys are: {', '.join(self.valid_gen_para)}")
raise KeyError(
f"Invalid generation parameter: '{key}'. Valid keys are: {', '.join(self.valid_gen_para)}"
)
if key in self.gen_config:
self.gen_config[key] = value
resp_text = self.text_completion(prompt, self.gen_config, self.auth_config)
# TODO: This may cause excessive tokens count
if stop is not None:
@ -97,29 +99,19 @@ class ColossalCloudLLM(LLM):
if stopping_words in resp_text:
resp_text = resp_text.split(stopping_words)[0]
return resp_text
def text_completion(self, prompt, gen_config, auth_config):
# Required Parameters
endpoint = auth_config.pop('endpoint')
max_new_tokens = gen_config.pop('max_new_tokens')
endpoint = auth_config.pop("endpoint")
max_new_tokens = gen_config.pop("max_new_tokens")
# Optional Parameters
optional_params = ['top_k', 'top_p', 'temperature', 'repetition_penalty'] # Self.optional
optional_params = ["top_k", "top_p", "temperature", "repetition_penalty"] # Self.optional
gen_config = {key: gen_config[key] for key in optional_params if key in gen_config}
# Define the data payload
data = {
"max_new_tokens": max_new_tokens,
"history": [
{"instruction": prompt, "response": ""}
],
**gen_config
}
headers = {
"Content-Type": "application/json",
**auth_config # 'Host',
}
data = {"max_new_tokens": max_new_tokens, "history": [{"instruction": prompt, "response": ""}], **gen_config}
headers = {"Content-Type": "application/json", **auth_config} # 'Host',
# Make the POST request
response = requests.post(endpoint, headers=headers, data=json.dumps(data))
response.raise_for_status() # raise error if return code is not 200(success)
response.raise_for_status() # raise error if return code is not 200(success)
# Check the response
return response.text

1
applications/ColossalQA/colossalqa/local/llm.py

@ -193,4 +193,3 @@ class VllmLLM(LLM):
def _identifying_params(self) -> Mapping[str, int]:
"""Get the identifying parameters."""
return {"n": self.n}

1
applications/ColossalQA/colossalqa/prompt/prompt.py

@ -4,7 +4,6 @@ All custom prompt templates are defined here.
from langchain.prompts.prompt import PromptTemplate
# Below are Chinese retrieval qa prompts
_CUSTOM_SUMMARIZER_TEMPLATE_ZH = """请递进式地总结所提供的当前对话,将当前对话的摘要内容添加到先前已有的摘要上,返回一个融合了当前对话的新的摘要。

8
applications/ColossalQA/colossalqa/retriever.py

@ -99,13 +99,7 @@ class CustomRetriever(BaseRetriever):
def clear_documents(self):
"""Clear all document vectors from database"""
for source in self.vector_stores:
index(
[],
self.record_managers[source],
self.vector_stores[source],
cleanup="full",
source_id_key="source"
)
index([], self.record_managers[source], self.vector_stores[source], cleanup="full", source_id_key="source")
self.vector_stores = {}
self.sql_index_database = {}
self.record_managers = {}

6
applications/ColossalQA/data/data_sample/companies.txt

@ -1,6 +1,6 @@
Overview The Straits Times is the English flagship daily of SPH Media, one of the leading media companies in Asia. Launched on July 15, 1845, its comprehensive coverage of news from home and around the world makes The Straits Times the most-read newspaper in Singapore. Quality news, in-depth analyses, impactful commentaries and breaking stories are packaged to give readers riveting accounts of events in Singapore, the region, and beyond. The most read newspaper in Singapore, both in terms of print and digital, it reaches 1.33 million people every day. The Straits Times' key strength is in its world class coverage of news outside Singapore. With 20 bureaus in major cities around the world, The Straits Times correspondents bring world news to readers on a Singapore platter, helping readers to appreciate world events from a Singaporean perspective. Website http://www.straitstimes.com Phone 63196319Phone number is 63196319 Industry Newspaper Publishing Company size 1,001-5,000 employees 183 on LinkedIn Includes members with current employer listed as The Straits Times, including part-time roles. Headquarters Singapore, Singapore Founded 1845 Specialties News and Digital media
About With over 500 properties worldwide, Marriott Hotels has reimagined hospitality to exceed the expectations of business, group, and leisure travelers.
About With over 500 properties worldwide, Marriott Hotels has reimagined hospitality to exceed the expectations of business, group, and leisure travelers.
Marriott Hotels, Marriott’s flagship brand of quality-tier, full-service hotels and resorts, provides consistent, dependable and genuinely caring experiences to guests on their terms. Marriott is a brilliant host to guests who effortlessly blend life and work, and who are inspired by how modern travel enhances them both. Our hotels offer warm, professional service; sophisticated yet functional guest room design; lobby spaces that facilitate working, dining and socializing; restaurants and bars serving international cuisine prepared simply and from the freshest ingredients; meeting and event spaces and services that are gold standard; and expansive, 24-hour fitness facilities.
Overview AERCO International, Inc. is a recognized leader in delivering cost-effective, condensing commercial boilers, high-efficiency water heaters across a variety of markets including education, lodging, government, office buildings, healthcare, industrial and multifamily housing. AERCO's system design approach provides customer-specific solutions that deliver superior building performance at a lower operating cost while assuring uptime reliability. When AERCO was founded in 1949, it introduced a revolutionary design for an indirect-fired water heater that heated water on demand, and without storage, at a controlled temperature. This innovation became today's standard for water heaters, maximizing the recovery of latent heat energy and significantly increasing operating efficiency. AERCO continued to innovate and in 1988, introduced the first condensing and fully modulating boiler and water heater to the commercial market. The modulating capability of these products, still unsurpassed more than 25 years later, matches the equipment's output to real-time heating demand, ensuring the units draw no more fuel to operate than is absolutely necessary. This not only saves precious energy, but also ensures money doesn't needlessly disappear "up the stack." AERCO differentiates itself through a solution-based model, leveraging decades of engineering experience and industry application expertise to understand each customer’s unique needs. By partnering directly with customers and end-users to understand their project-specific requirements, AERCO provides tailored application solutions that are comprised of original product technologies including high efficiency condensing products, compact footprints, high turndown ratios, unique fuel delivery, leading control systems and proprietary design elements that combine to deliver up to 99% efficiency. Website http://www.aerco.com Phone 845-580-8000Phone number is 845-580-8000 Industry Industrial Machinery Manufacturing Company size 51-200 employees 119 on LinkedIn Includes members with current employer listed as AERCO International, Inc., including part-time roles. Headquarters Blauvelt, NY Founded 1949 Specialties Leading manufacturer of condensing boilers, water heating and energy recovery products and The originator of semi-instantaneous water heating
Overview AERCO International, Inc. is a recognized leader in delivering cost-effective, condensing commercial boilers, high-efficiency water heaters across a variety of markets including education, lodging, government, office buildings, healthcare, industrial and multifamily housing. AERCO's system design approach provides customer-specific solutions that deliver superior building performance at a lower operating cost while assuring uptime reliability. When AERCO was founded in 1949, it introduced a revolutionary design for an indirect-fired water heater that heated water on demand, and without storage, at a controlled temperature. This innovation became today's standard for water heaters, maximizing the recovery of latent heat energy and significantly increasing operating efficiency. AERCO continued to innovate and in 1988, introduced the first condensing and fully modulating boiler and water heater to the commercial market. The modulating capability of these products, still unsurpassed more than 25 years later, matches the equipment's output to real-time heating demand, ensuring the units draw no more fuel to operate than is absolutely necessary. This not only saves precious energy, but also ensures money doesn't needlessly disappear "up the stack." AERCO differentiates itself through a solution-based model, leveraging decades of engineering experience and industry application expertise to understand each customer’s unique needs. By partnering directly with customers and end-users to understand their project-specific requirements, AERCO provides tailored application solutions that are comprised of original product technologies including high efficiency condensing products, compact footprints, high turndown ratios, unique fuel delivery, leading control systems and proprietary design elements that combine to deliver up to 99% efficiency. Website http://www.aerco.com Phone 845-580-8000Phone number is 845-580-8000 Industry Industrial Machinery Manufacturing Company size 51-200 employees 119 on LinkedIn Includes members with current employer listed as AERCO International, Inc., including part-time roles. Headquarters Blauvelt, NY Founded 1949 Specialties Leading manufacturer of condensing boilers, water heating and energy recovery products and The originator of semi-instantaneous water heating
Prince PLC: Overview We are a global leader of quality water solutions for residential, industrial, municipal, and commercial settings. Our family of brands offers one of the most varied product lines in the world, with world-class, water-related solutions focused on: • Plumbing & Flow Control • Water Quality & Conditioning • Water Reuse & Drainage • HVAC • Municipal Waterworks Strategic Goals Watts Water is traded on the New York Stock Exchange under the symbol “WTS.” As a public company, growing shareholder value is critical. To that end, we focus on a five-part Global Strategy: Growth, Commercial Excellence, Operational Excellence, “One Watts Water,” and a Talent & Performance Culture. Follow us on all social media platforms @WattsWater Website http://www.watts.com/ Industry Wholesale Building Materials Company size 5,001-10,000 employees 2,248 on LinkedIn Includes members with current employer listed as Watts Water Technologies, including part-time roles. Headquarters North Andover, MA Specialties Plumbing, HVAC, Water Quality, Gas, Conditioning, Waterworks, and Drainage
About Courtyard Hotels is Marriott International’s largest hotel brand, with more than 1,100 hotels in over 50 countries worldwide. So, no matter where passion takes you, you’ll find us there to help you follow it. Proud members of Marriott Bonvoy.
About Courtyard Hotels is Marriott International’s largest hotel brand, with more than 1,100 hotels in over 50 countries worldwide. So, no matter where passion takes you, you’ll find us there to help you follow it. Proud members of Marriott Bonvoy.

2
applications/ColossalQA/data/data_sample/companies_zh.txt

@ -3,4 +3,4 @@
万豪酒店(Marriott Hotels)是万豪旗下优质、全方位服务酒店和度假村的旗舰品牌,为客人提供始终如一、可靠和真诚关怀的体验。万豪是一个出色的主人,客人可以轻松地将生活和工作融合在一起,并受到现代旅行如何增强两者的启发。我们的酒店提供热情、专业的服务;精致而实用的客房设计;大堂空间,方便工作、餐饮和社交;餐厅和酒吧提供简单的国际美食和最新鲜的食材;会议及活动场地及服务均属黄金标准;还有宽敞的24小时健身设施。
AERCO International, Inc.是公认的领导者,为教育、住宿、政府、办公楼、医疗保健、工业和多户住宅等各种市场提供具有成本效益的冷凝商用锅炉和高效热水器。AERCO的系统设计方法为客户提供特定的解决方案,以较低的运营成本提供卓越的建筑性能,同时确保正常运行时间的可靠性。AERCO成立于1949年,它推出了一种革命性的设计,用于间接燃烧热水器,在控制温度下按需加热水,而无需储存。这一创新成为当今热水器的标准,最大限度地回收潜热能量,显著提高运行效率。AERCO不断创新,并于1988年向商业市场推出了第一台冷凝和全调制锅炉和热水器。这些产品的调制能力,在超过25年后仍然无与伦比,使设备的输出与实时加热需求相匹配,确保机组不会消耗更多的燃料来运行,除非绝对必要。这不仅节省了宝贵的能源,还确保了钱不会不必要地消失在“堆栈”上。AERCO通过基于解决方案的模式脱颖而出,利用数十年的工程经验和行业应用专业知识来了解每个客户的独特需求。通过与客户和最终用户直接合作,了解他们的项目具体要求,AERCO提供量身定制的应用解决方案,这些解决方案由原创产品技术组成,包括高效冷凝产品,紧凑的足迹,高降压比,独特的燃料输送,领先的控制系统和专有设计元素,结合起来可提供高达99%的效率。网址http://www.aerco.com电话845-580- 8000电话号码845-580-8000工业工业机械制造公司规模51-200名员工LinkedIn上包括当前雇主AERCO International, Inc的成员,包括兼职职位。总部成立于1949年,纽约州布劳维尔特,专长:冷凝锅炉,水加热和能源回收产品的领先制造商,半瞬时水加热的鼻祖
Prince PLC:概述Prince PLC是为住宅、工业、市政和商业环境提供优质水解决方案的全球领导者。我们的品牌家族提供世界上最多样化的产品线之一,拥有世界级的水相关解决方案,专注于:•管道和流量控制•水质和调理•水再利用和排水•hvac•市政水务战略目标瓦茨水务在纽约证券交易所上市,代码为“WTS”。作为一家上市公司,股东价值的增长至关重要。为此,我们将重点放在五部分全球战略上:增长、卓越商业、卓越运营、“一瓦茨水”以及人才与绩效文化。在所有社交媒体平台关注我们@WattsWater网站http://www.watts.com/行业批发建材公司规模5,001-10,000名员工领英2,248名包括目前雇主为WattsWater Technologies的成员,包括兼职职位。总部北安多弗,MA专业管道,暖通空调,水质,气体,空调,自来水厂和排水
万怡酒店是万豪国际最大的酒店品牌,在全球50多个国家拥有1100多家酒店。所以,无论你的激情带你去哪里,你都会发现我们会帮助你追随它。万豪酒店的骄傲会员。
万怡酒店是万豪国际最大的酒店品牌,在全球50多个国家拥有1100多家酒店。所以,无论你的激情带你去哪里,你都会发现我们会帮助你追随它。万豪酒店的骄傲会员。

2
applications/ColossalQA/data/data_sample/csv_organization_100.csv

@ -98,4 +98,4 @@ Index,Organization Id,Company Name,Website,Country,Description,Founded,Industry,
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785

1 Index Organization Id Company Name Website Country Description Founded Industry Number of employees
98 97 BA6Cd9Dae2Efd62 Good Ltd http://duffy.com/ Anguilla Reverse-engineered composite moratorium 1971 Consumer Services 4292
99 98 E7df80C60Abd7f9 Clements-Espinoza http://www.flowers.net/ Falkland Islands (Malvinas) Progressive modular hub 1991 Broadcast Media 236
100 99 AFc285dbE2fEd24 Mendez Inc https://www.burke.net/ Kyrgyz Republic User-friendly exuding migration 1993 Education Management 339
101 100 e9eB5A60Cef8354 Watkins-Kaiser http://www.herring.com/ Togo Synergistic background access 2009 Financial Services 2785

2
applications/ColossalQA/data/tests/64KB.json

@ -4,4 +4,4 @@
{"content":"Aliquam sollicitudin ante ligula, eget malesuada nibh efficitur et. Pellentesque massa sem, scelerisque sit amet odio id, cursus tempor urna. Etiam congue dignissim volutpat. Vestibulum pharetra libero et velit gravida euismod."}
],
"name":"player"
}
}

202
applications/ColossalQA/data/tests/companies.csv

@ -1,101 +1,101 @@
Index,Organization Id,Name,Website,Country,Description,Founded,Industry,Number of employees
1,FAB0d41d5b5d22c,Ferrell LLC,https://price.net/,Papua New Guinea,Horizontal empowering knowledgebase,1990,Plastics,3498
2,6A7EdDEA9FaDC52,"Mckinney, Riley and Day",http://www.hall-buchanan.info/,Finland,User-centric system-worthy leverage,2015,Glass / Ceramics / Concrete,4952
3,0bFED1ADAE4bcC1,Hester Ltd,http://sullivan-reed.com/,China,Switchable scalable moratorium,1971,Public Safety,5287
4,2bFC1Be8a4ce42f,Holder-Sellers,https://becker.com/,Turkmenistan,De-engineered systemic artificial intelligence,2004,Automotive,921
5,9eE8A6a4Eb96C24,Mayer Group,http://www.brewer.com/,Mauritius,Synchronized needs-based challenge,1991,Transportation,7870
6,cC757116fe1C085,Henry-Thompson,http://morse.net/,Bahamas,Face-to-face well-modulated customer loyalty,1992,Primary / Secondary Education,4914
7,219233e8aFF1BC3,Hansen-Everett,https://www.kidd.org/,Pakistan,Seamless disintermediate collaboration,2018,Publishing Industry,7832
8,ccc93DCF81a31CD,Mcintosh-Mora,https://www.brooks.com/,Heard Island and McDonald Islands,Centralized attitude-oriented capability,1970,Import / Export,4389
9,0B4F93aA06ED03e,Carr Inc,http://ross.com/,Kuwait,Distributed impactful customer loyalty,1996,Plastics,8167
10,738b5aDe6B1C6A5,Gaines Inc,http://sandoval-hooper.com/,Uzbekistan,Multi-lateral scalable protocol,1997,Outsourcing / Offshoring,9698
11,AE61b8Ffebbc476,Kidd Group,http://www.lyons.com/,Bouvet Island (Bouvetoya),Proactive foreground paradigm,2001,Primary / Secondary Education,7473
12,eb3B7D06cCdD609,Crane-Clarke,https://www.sandoval.com/,Denmark,Front-line clear-thinking encryption,2014,Food / Beverages,9011
13,8D0c29189C9798B,"Keller, Campos and Black",https://www.garner.info/,Liberia,Ameliorated directional emulation,2020,Museums / Institutions,2862
14,D2c91cc03CA394c,Glover-Pope,http://www.silva.biz/,United Arab Emirates,Persevering contextually-based approach,2013,Medical Practice,9079
15,C8AC1eaf9C036F4,Pacheco-Spears,https://aguilar.com/,Sweden,Secured logistical synergy,1984,Maritime,769
16,b5D10A14f7a8AfE,Hodge-Ayers,http://www.archer-elliott.com/,Honduras,Future-proofed radical implementation,1990,Facilities Services,8508
17,68139b5C4De03B4,"Bowers, Guerra and Krause",http://www.carrillo-nicholson.com/,Uganda,De-engineered transitional strategy,1972,Primary / Secondary Education,6986
18,5c2EffEfdba2BdF,Mckenzie-Melton,http://montoya-thompson.com/,Hong Kong,Reverse-engineered heuristic alliance,1998,Investment Management / Hedge Fund / Private Equity,4589
19,ba179F19F7925f5,Branch-Mann,http://www.lozano.com/,Botswana,Adaptive intangible frame,1999,Architecture / Planning,7961
20,c1Ce9B350BAc66b,Weiss and Sons,https://barrett.com/,Korea,Sharable optimal functionalities,2011,Plastics,5984
21,8de40AC4e6EaCa4,"Velez, Payne and Coffey",http://burton.com/,Luxembourg,Mandatory coherent synergy,1986,Wholesale,5010
22,Aad86a4F0385F2d,Harrell LLC,http://www.frey-rosario.com/,Guadeloupe,Reverse-engineered mission-critical moratorium,2018,Construction,2185
23,22aC3FFd64fD703,"Eaton, Reynolds and Vargas",http://www.freeman.biz/,Monaco,Self-enabling multi-tasking process improvement,2014,Luxury Goods / Jewelry,8987
24,5Ec4C272bCf085c,Robbins-Cummings,http://donaldson-wilkins.com/,Belgium,Organic non-volatile hierarchy,1991,Pharmaceuticals,5038
25,5fDBeA8BB91a000,Jenkins Inc,http://www.kirk.biz/,South Africa,Front-line systematic help-desk,2002,Insurance,1215
26,dFfD6a6F9AC2d9C,"Greene, Benjamin and Novak",http://www.kent.net/,Romania,Centralized leadingedge moratorium,2012,Museums / Institutions,4941
27,4B217cC5a0674C5,"Dickson, Richmond and Clay",http://everett.com/,Czech Republic,Team-oriented tangible complexity,1980,Real Estate / Mortgage,3122
28,88b1f1cDcf59a37,Prince-David,http://thompson.com/,Christmas Island,Virtual holistic methodology,1970,Banking / Mortgage,1046
29,f9F7bBCAEeC360F,Ayala LLC,http://www.zhang.com/,Philippines,Open-source zero administration hierarchy,2021,Legal Services,7664
30,7Cb3AeFcE4Ba31e,Rivas Group,https://hebert.org/,Australia,Open-architected well-modulated capacity,1998,Logistics / Procurement,4155
31,ccBcC32adcbc530,"Sloan, Mays and Whitehead",http://lawson.com/,Chad,Face-to-face high-level conglomeration,1997,Civil Engineering,365
32,f5afd686b3d05F5,"Durham, Allen and Barnes",http://chan-stafford.org/,Zimbabwe,Synergistic web-enabled framework,1993,Mechanical or Industrial Engineering,6135
33,38C6cfC5074Fa5e,Fritz-Franklin,http://www.lambert.com/,Nepal,Automated 4thgeneration website,1972,Hospitality,4516
34,5Cd7efccCcba38f,Burch-Ewing,http://cline.net/,Taiwan,User-centric 4thgeneration system engine,1981,Venture Capital / VC,7443
35,9E6Acb51e3F9d6F,"Glass, Barrera and Turner",https://dunlap.com/,Kyrgyz Republic,Multi-channeled 3rdgeneration open system,2020,Utilities,2610
36,4D4d7E18321eaeC,Pineda-Cox,http://aguilar.org/,Bolivia,Fundamental asynchronous capability,2010,Human Resources / HR,1312
37,485f5d06B938F2b,"Baker, Mccann and Macdonald",http://www.anderson-barker.com/,Kenya,Cross-group user-facing focus group,2013,Legislative Office,1638
38,19E3a5Bf6dBDc4F,Cuevas-Moss,https://dodson-castaneda.net/,Guatemala,Extended human-resource intranet,1994,Music,9995
39,6883A965c7b68F7,Hahn PLC,http://newman.com/,Belarus,Organic logistical leverage,2012,Electrical / Electronic Manufacturing,3715
40,AC5B7AA74Aa4A2E,"Valentine, Ferguson and Kramer",http://stuart.net/,Jersey,Centralized secondary time-frame,1997,Non - Profit / Volunteering,3585
41,decab0D5027CA6a,Arroyo Inc,https://www.turner.com/,Grenada,Managed demand-driven website,2006,Writing / Editing,9067
42,dF084FbBb613eea,Walls LLC,http://www.reese-vasquez.biz/,Cape Verde,Self-enabling fresh-thinking installation,1989,Investment Management / Hedge Fund / Private Equity,1678
43,A2D89Ab9bCcAd4e,"Mitchell, Warren and Schneider",https://fox.biz/,Trinidad and Tobago,Enhanced intangible time-frame,2021,Capital Markets / Hedge Fund / Private Equity,3816
44,77aDc905434a49f,Prince PLC,https://www.watts.com/,Sweden,Profit-focused coherent installation,2016,Individual / Family Services,7645
45,235fdEFE2cfDa5F,Brock-Blackwell,http://www.small.com/,Benin,Secured foreground emulation,1986,Online Publishing,7034
46,1eD64cFe986BBbE,Walton-Barnett,https://ashley-schaefer.com/,Western Sahara,Right-sized clear-thinking flexibility,2001,Luxury Goods / Jewelry,1746
47,CbBbFcdd0eaE2cF,Bartlett-Arroyo,https://cruz.com/,Northern Mariana Islands,Realigned didactic function,1976,Civic / Social Organization,3987
48,49aECbDaE6aBD53,"Wallace, Madden and Morris",http://www.blevins-fernandez.biz/,Germany,Persistent real-time customer loyalty,2016,Pharmaceuticals,9443
49,7b3fe6e7E72bFa4,Berg-Sparks,https://cisneros-love.com/,Canada,Stand-alone static implementation,1974,Arts / Crafts,2073
50,c6DedA82A8aef7E,Gonzales Ltd,http://bird.com/,Tonga,Managed human-resource policy,1988,Consumer Goods,9069
51,7D9FBF85cdC3871,Lawson and Sons,https://www.wong.com/,French Southern Territories,Compatible analyzing intranet,2021,Arts / Crafts,3527
52,7dd18Fb7cB07b65,"Mcguire, Mcconnell and Olsen",https://melton-briggs.com/,Korea,Profound client-server frame,1988,Printing,8445
53,EF5B55FadccB8Fe,Charles-Phillips,https://bowman.com/,Cote d'Ivoire,Monitored client-server implementation,2012,Mental Health Care,3450
54,f8D4B99e11fAF5D,Odom Ltd,https://www.humphrey-hess.com/,Cote d'Ivoire,Advanced static process improvement,2012,Management Consulting,1825
55,e24D21BFd3bF1E5,Richard PLC,https://holden-coleman.net/,Mayotte,Object-based optimizing model,1971,Broadcast Media,4942
56,B9BdfEB6D3Ca44E,Sampson Ltd,https://blevins.com/,Cayman Islands,Intuitive local adapter,2005,Farming,1418
57,2a74D6f3D3B268e,"Cherry, Le and Callahan",https://waller-delacruz.biz/,Nigeria,Universal human-resource collaboration,2017,Entertainment / Movie Production,7202
58,Bf3F3f62c8aBC33,Cherry PLC,https://www.avila.info/,Marshall Islands,Persistent tertiary website,1980,Plastics,8245
59,aeBe26B80a7a23c,Melton-Nichols,https://kennedy.com/,Palau,User-friendly clear-thinking productivity,2021,Legislative Office,8741
60,aAeb29ad43886C6,Potter-Walsh,http://thomas-french.org/,Turkey,Optional non-volatile open system,2008,Human Resources / HR,6923
61,bD1bc6bB6d1FeD3,Freeman-Chen,https://mathis.com/,Timor-Leste,Phased next generation adapter,1973,International Trade / Development,346
62,EB9f456e8b7022a,Soto Group,https://norris.info/,Vietnam,Enterprise-wide executive installation,1988,Business Supplies / Equipment,9097
63,Dfef38C51D8DAe3,"Poole, Cruz and Whitney",https://reed.info/,Reunion,Balanced analyzing groupware,1978,Marketing / Advertising / Sales,2992
64,055ffEfB2Dd95B0,Riley Ltd,http://wiley.com/,Brazil,Optional exuding superstructure,1986,Textiles,9315
65,cBfe4dbAE1699da,"Erickson, Andrews and Bailey",https://www.hobbs-grant.com/,Eritrea,Vision-oriented secondary project,2014,Consumer Electronics,7829
66,fdFbecbadcdCdf1,"Wilkinson, Charles and Arroyo",http://hunter-mcfarland.com/,United States Virgin Islands,Assimilated 24/7 archive,1996,Building Materials,602
67,5DCb8A5a5ca03c0,Floyd Ltd,http://www.whitney.com/,Falkland Islands (Malvinas),Function-based fault-tolerant concept,2017,Public Relations / PR,2911
68,ce57DCbcFD6d618,Newman-Galloway,https://www.scott.com/,Luxembourg,Enhanced foreground collaboration,1987,Information Technology / IT,3934
69,5aaD187dc929371,Frazier-Butler,https://www.daugherty-farley.info/,Northern Mariana Islands,Persistent interactive circuit,1972,Outsourcing / Offshoring,5130
70,902D7Ac8b6d476b,Newton Inc,https://www.richmond-manning.info/,Netherlands Antilles,Fundamental stable info-mediaries,1976,Military Industry,563
71,32BB9Ff4d939788,Duffy-Levy,https://www.potter.com/,Guernsey,Diverse exuding installation,1982,Wireless,6146
72,adcB0afbE58bAe3,Wagner LLC,https://decker-esparza.com/,Uruguay,Reactive attitude-oriented toolset,1987,International Affairs,6874
73,dfcA1c84AdB61Ac,Mccall-Holmes,http://www.dean.com/,Benin,Object-based value-added database,2009,Legal Services,696
74,208044AC2fe52F3,Massey LLC,https://frazier.biz/,Suriname,Configurable zero administration Graphical User Interface,1986,Accounting,5004
75,f3C365f0c1A0623,Hicks LLC,http://alvarez.biz/,Pakistan,Quality-focused client-server Graphical User Interface,1970,Computer Software / Engineering,8480
76,ec5Bdd3CBAfaB93,"Cole, Russell and Avery",http://www.blankenship.com/,Mongolia,De-engineered fault-tolerant challenge,2000,Law Enforcement,7012
77,DDB19Be7eeB56B4,Cummings-Rojas,https://simon-pearson.com/,Svalbard & Jan Mayen Islands,User-centric modular customer loyalty,2012,Financial Services,7529
78,dd6CA3d0bc3cAfc,"Beasley, Greene and Mahoney",http://www.petersen-lawrence.com/,Togo,Extended content-based methodology,1976,Religious Institutions,869
79,A0B9d56e61070e3,"Beasley, Sims and Allison",http://burke.info/,Latvia,Secured zero tolerance hub,1972,Facilities Services,6182
80,cBa7EFe5D05Adaf,Crawford-Rivera,https://black-ramirez.org/,Cuba,Persevering exuding budgetary management,1999,Online Publishing,7805
81,Ea3f6D52Ec73563,Montes-Hensley,https://krueger.org/,Liechtenstein,Multi-tiered secondary productivity,2009,Printing,8433
82,bC0CEd48A8000E0,Velazquez-Odom,https://stokes.com/,Djibouti,Streamlined 6thgeneration function,2002,Alternative Dispute Resolution,4044
83,c89b9b59BC4baa1,Eaton-Morales,https://www.reeves-graham.com/,Micronesia,Customer-focused explicit frame,1990,Capital Markets / Hedge Fund / Private Equity,7013
84,FEC51bce8421a7b,"Roberson, Pennington and Palmer",http://www.keith-fisher.com/,Cameroon,Adaptive bi-directional hierarchy,1993,Telecommunications,5571
85,e0E8e27eAc9CAd5,"George, Russo and Guerra",https://drake.com/,Sweden,Centralized non-volatile capability,1989,Military Industry,2880
86,B97a6CF9bf5983C,Davila Inc,https://mcconnell.info/,Cocos (Keeling) Islands,Profit-focused dedicated frame,2017,Consumer Electronics,2215
87,a0a6f9b3DbcBEb5,Mays-Preston,http://www.browning-key.com/,Mali,User-centric heuristic focus group,2006,Military Industry,5786
88,8cC1bDa330a5871,Pineda-Morton,https://www.carr.com/,United States Virgin Islands,Grass-roots methodical info-mediaries,1991,Printing,6168
89,ED889CB2FE9cbd3,Huang and Sons,https://www.bolton.com/,Eritrea,Re-contextualized dynamic hierarchy,1981,Semiconductors,7484
90,F4Dc1417BC6cb8f,Gilbert-Simon,https://www.bradford.biz/,Burundi,Grass-roots radical parallelism,1973,Newspapers / Journalism,1927
91,7ABc3c7ecA03B34,Sampson-Griffith,http://hendricks.org/,Benin,Multi-layered composite paradigm,1972,Textiles,3881
92,4e0719FBE38e0aB,Miles-Dominguez,http://www.turner.com/,Gibraltar,Organized empowering forecast,1996,Civic / Social Organization,897
93,dEbDAAeDfaed00A,Rowe and Sons,https://www.simpson.org/,El Salvador,Balanced multimedia knowledgebase,1978,Facilities Services,8172
94,61BDeCfeFD0cEF5,"Valenzuela, Holmes and Rowland",https://www.dorsey.net/,Taiwan,Persistent tertiary focus group,1999,Transportation,1483
95,4e91eD25f486110,"Best, Wade and Shepard",https://zimmerman.com/,Zimbabwe,Innovative background definition,1991,Gambling / Casinos,4873
96,0a0bfFbBbB8eC7c,Holmes Group,https://mcdowell.org/,Ethiopia,Right-sized zero tolerance focus group,1975,Photography,2988
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785
Index,Organization Id,Name,Website,Country,Description,Founded,Industry,Number of employees
1,FAB0d41d5b5d22c,Ferrell LLC,https://price.net/,Papua New Guinea,Horizontal empowering knowledgebase,1990,Plastics,3498
2,6A7EdDEA9FaDC52,"Mckinney, Riley and Day",http://www.hall-buchanan.info/,Finland,User-centric system-worthy leverage,2015,Glass / Ceramics / Concrete,4952
3,0bFED1ADAE4bcC1,Hester Ltd,http://sullivan-reed.com/,China,Switchable scalable moratorium,1971,Public Safety,5287
4,2bFC1Be8a4ce42f,Holder-Sellers,https://becker.com/,Turkmenistan,De-engineered systemic artificial intelligence,2004,Automotive,921
5,9eE8A6a4Eb96C24,Mayer Group,http://www.brewer.com/,Mauritius,Synchronized needs-based challenge,1991,Transportation,7870
6,cC757116fe1C085,Henry-Thompson,http://morse.net/,Bahamas,Face-to-face well-modulated customer loyalty,1992,Primary / Secondary Education,4914
7,219233e8aFF1BC3,Hansen-Everett,https://www.kidd.org/,Pakistan,Seamless disintermediate collaboration,2018,Publishing Industry,7832
8,ccc93DCF81a31CD,Mcintosh-Mora,https://www.brooks.com/,Heard Island and McDonald Islands,Centralized attitude-oriented capability,1970,Import / Export,4389
9,0B4F93aA06ED03e,Carr Inc,http://ross.com/,Kuwait,Distributed impactful customer loyalty,1996,Plastics,8167
10,738b5aDe6B1C6A5,Gaines Inc,http://sandoval-hooper.com/,Uzbekistan,Multi-lateral scalable protocol,1997,Outsourcing / Offshoring,9698
11,AE61b8Ffebbc476,Kidd Group,http://www.lyons.com/,Bouvet Island (Bouvetoya),Proactive foreground paradigm,2001,Primary / Secondary Education,7473
12,eb3B7D06cCdD609,Crane-Clarke,https://www.sandoval.com/,Denmark,Front-line clear-thinking encryption,2014,Food / Beverages,9011
13,8D0c29189C9798B,"Keller, Campos and Black",https://www.garner.info/,Liberia,Ameliorated directional emulation,2020,Museums / Institutions,2862
14,D2c91cc03CA394c,Glover-Pope,http://www.silva.biz/,United Arab Emirates,Persevering contextually-based approach,2013,Medical Practice,9079
15,C8AC1eaf9C036F4,Pacheco-Spears,https://aguilar.com/,Sweden,Secured logistical synergy,1984,Maritime,769
16,b5D10A14f7a8AfE,Hodge-Ayers,http://www.archer-elliott.com/,Honduras,Future-proofed radical implementation,1990,Facilities Services,8508
17,68139b5C4De03B4,"Bowers, Guerra and Krause",http://www.carrillo-nicholson.com/,Uganda,De-engineered transitional strategy,1972,Primary / Secondary Education,6986
18,5c2EffEfdba2BdF,Mckenzie-Melton,http://montoya-thompson.com/,Hong Kong,Reverse-engineered heuristic alliance,1998,Investment Management / Hedge Fund / Private Equity,4589
19,ba179F19F7925f5,Branch-Mann,http://www.lozano.com/,Botswana,Adaptive intangible frame,1999,Architecture / Planning,7961
20,c1Ce9B350BAc66b,Weiss and Sons,https://barrett.com/,Korea,Sharable optimal functionalities,2011,Plastics,5984
21,8de40AC4e6EaCa4,"Velez, Payne and Coffey",http://burton.com/,Luxembourg,Mandatory coherent synergy,1986,Wholesale,5010
22,Aad86a4F0385F2d,Harrell LLC,http://www.frey-rosario.com/,Guadeloupe,Reverse-engineered mission-critical moratorium,2018,Construction,2185
23,22aC3FFd64fD703,"Eaton, Reynolds and Vargas",http://www.freeman.biz/,Monaco,Self-enabling multi-tasking process improvement,2014,Luxury Goods / Jewelry,8987
24,5Ec4C272bCf085c,Robbins-Cummings,http://donaldson-wilkins.com/,Belgium,Organic non-volatile hierarchy,1991,Pharmaceuticals,5038
25,5fDBeA8BB91a000,Jenkins Inc,http://www.kirk.biz/,South Africa,Front-line systematic help-desk,2002,Insurance,1215
26,dFfD6a6F9AC2d9C,"Greene, Benjamin and Novak",http://www.kent.net/,Romania,Centralized leadingedge moratorium,2012,Museums / Institutions,4941
27,4B217cC5a0674C5,"Dickson, Richmond and Clay",http://everett.com/,Czech Republic,Team-oriented tangible complexity,1980,Real Estate / Mortgage,3122
28,88b1f1cDcf59a37,Prince-David,http://thompson.com/,Christmas Island,Virtual holistic methodology,1970,Banking / Mortgage,1046
29,f9F7bBCAEeC360F,Ayala LLC,http://www.zhang.com/,Philippines,Open-source zero administration hierarchy,2021,Legal Services,7664
30,7Cb3AeFcE4Ba31e,Rivas Group,https://hebert.org/,Australia,Open-architected well-modulated capacity,1998,Logistics / Procurement,4155
31,ccBcC32adcbc530,"Sloan, Mays and Whitehead",http://lawson.com/,Chad,Face-to-face high-level conglomeration,1997,Civil Engineering,365
32,f5afd686b3d05F5,"Durham, Allen and Barnes",http://chan-stafford.org/,Zimbabwe,Synergistic web-enabled framework,1993,Mechanical or Industrial Engineering,6135
33,38C6cfC5074Fa5e,Fritz-Franklin,http://www.lambert.com/,Nepal,Automated 4thgeneration website,1972,Hospitality,4516
34,5Cd7efccCcba38f,Burch-Ewing,http://cline.net/,Taiwan,User-centric 4thgeneration system engine,1981,Venture Capital / VC,7443
35,9E6Acb51e3F9d6F,"Glass, Barrera and Turner",https://dunlap.com/,Kyrgyz Republic,Multi-channeled 3rdgeneration open system,2020,Utilities,2610
36,4D4d7E18321eaeC,Pineda-Cox,http://aguilar.org/,Bolivia,Fundamental asynchronous capability,2010,Human Resources / HR,1312
37,485f5d06B938F2b,"Baker, Mccann and Macdonald",http://www.anderson-barker.com/,Kenya,Cross-group user-facing focus group,2013,Legislative Office,1638
38,19E3a5Bf6dBDc4F,Cuevas-Moss,https://dodson-castaneda.net/,Guatemala,Extended human-resource intranet,1994,Music,9995
39,6883A965c7b68F7,Hahn PLC,http://newman.com/,Belarus,Organic logistical leverage,2012,Electrical / Electronic Manufacturing,3715
40,AC5B7AA74Aa4A2E,"Valentine, Ferguson and Kramer",http://stuart.net/,Jersey,Centralized secondary time-frame,1997,Non - Profit / Volunteering,3585
41,decab0D5027CA6a,Arroyo Inc,https://www.turner.com/,Grenada,Managed demand-driven website,2006,Writing / Editing,9067
42,dF084FbBb613eea,Walls LLC,http://www.reese-vasquez.biz/,Cape Verde,Self-enabling fresh-thinking installation,1989,Investment Management / Hedge Fund / Private Equity,1678
43,A2D89Ab9bCcAd4e,"Mitchell, Warren and Schneider",https://fox.biz/,Trinidad and Tobago,Enhanced intangible time-frame,2021,Capital Markets / Hedge Fund / Private Equity,3816
44,77aDc905434a49f,Prince PLC,https://www.watts.com/,Sweden,Profit-focused coherent installation,2016,Individual / Family Services,7645
45,235fdEFE2cfDa5F,Brock-Blackwell,http://www.small.com/,Benin,Secured foreground emulation,1986,Online Publishing,7034
46,1eD64cFe986BBbE,Walton-Barnett,https://ashley-schaefer.com/,Western Sahara,Right-sized clear-thinking flexibility,2001,Luxury Goods / Jewelry,1746
47,CbBbFcdd0eaE2cF,Bartlett-Arroyo,https://cruz.com/,Northern Mariana Islands,Realigned didactic function,1976,Civic / Social Organization,3987
48,49aECbDaE6aBD53,"Wallace, Madden and Morris",http://www.blevins-fernandez.biz/,Germany,Persistent real-time customer loyalty,2016,Pharmaceuticals,9443
49,7b3fe6e7E72bFa4,Berg-Sparks,https://cisneros-love.com/,Canada,Stand-alone static implementation,1974,Arts / Crafts,2073
50,c6DedA82A8aef7E,Gonzales Ltd,http://bird.com/,Tonga,Managed human-resource policy,1988,Consumer Goods,9069
51,7D9FBF85cdC3871,Lawson and Sons,https://www.wong.com/,French Southern Territories,Compatible analyzing intranet,2021,Arts / Crafts,3527
52,7dd18Fb7cB07b65,"Mcguire, Mcconnell and Olsen",https://melton-briggs.com/,Korea,Profound client-server frame,1988,Printing,8445
53,EF5B55FadccB8Fe,Charles-Phillips,https://bowman.com/,Cote d'Ivoire,Monitored client-server implementation,2012,Mental Health Care,3450
54,f8D4B99e11fAF5D,Odom Ltd,https://www.humphrey-hess.com/,Cote d'Ivoire,Advanced static process improvement,2012,Management Consulting,1825
55,e24D21BFd3bF1E5,Richard PLC,https://holden-coleman.net/,Mayotte,Object-based optimizing model,1971,Broadcast Media,4942
56,B9BdfEB6D3Ca44E,Sampson Ltd,https://blevins.com/,Cayman Islands,Intuitive local adapter,2005,Farming,1418
57,2a74D6f3D3B268e,"Cherry, Le and Callahan",https://waller-delacruz.biz/,Nigeria,Universal human-resource collaboration,2017,Entertainment / Movie Production,7202
58,Bf3F3f62c8aBC33,Cherry PLC,https://www.avila.info/,Marshall Islands,Persistent tertiary website,1980,Plastics,8245
59,aeBe26B80a7a23c,Melton-Nichols,https://kennedy.com/,Palau,User-friendly clear-thinking productivity,2021,Legislative Office,8741
60,aAeb29ad43886C6,Potter-Walsh,http://thomas-french.org/,Turkey,Optional non-volatile open system,2008,Human Resources / HR,6923
61,bD1bc6bB6d1FeD3,Freeman-Chen,https://mathis.com/,Timor-Leste,Phased next generation adapter,1973,International Trade / Development,346
62,EB9f456e8b7022a,Soto Group,https://norris.info/,Vietnam,Enterprise-wide executive installation,1988,Business Supplies / Equipment,9097
63,Dfef38C51D8DAe3,"Poole, Cruz and Whitney",https://reed.info/,Reunion,Balanced analyzing groupware,1978,Marketing / Advertising / Sales,2992
64,055ffEfB2Dd95B0,Riley Ltd,http://wiley.com/,Brazil,Optional exuding superstructure,1986,Textiles,9315
65,cBfe4dbAE1699da,"Erickson, Andrews and Bailey",https://www.hobbs-grant.com/,Eritrea,Vision-oriented secondary project,2014,Consumer Electronics,7829
66,fdFbecbadcdCdf1,"Wilkinson, Charles and Arroyo",http://hunter-mcfarland.com/,United States Virgin Islands,Assimilated 24/7 archive,1996,Building Materials,602
67,5DCb8A5a5ca03c0,Floyd Ltd,http://www.whitney.com/,Falkland Islands (Malvinas),Function-based fault-tolerant concept,2017,Public Relations / PR,2911
68,ce57DCbcFD6d618,Newman-Galloway,https://www.scott.com/,Luxembourg,Enhanced foreground collaboration,1987,Information Technology / IT,3934
69,5aaD187dc929371,Frazier-Butler,https://www.daugherty-farley.info/,Northern Mariana Islands,Persistent interactive circuit,1972,Outsourcing / Offshoring,5130
70,902D7Ac8b6d476b,Newton Inc,https://www.richmond-manning.info/,Netherlands Antilles,Fundamental stable info-mediaries,1976,Military Industry,563
71,32BB9Ff4d939788,Duffy-Levy,https://www.potter.com/,Guernsey,Diverse exuding installation,1982,Wireless,6146
72,adcB0afbE58bAe3,Wagner LLC,https://decker-esparza.com/,Uruguay,Reactive attitude-oriented toolset,1987,International Affairs,6874
73,dfcA1c84AdB61Ac,Mccall-Holmes,http://www.dean.com/,Benin,Object-based value-added database,2009,Legal Services,696
74,208044AC2fe52F3,Massey LLC,https://frazier.biz/,Suriname,Configurable zero administration Graphical User Interface,1986,Accounting,5004
75,f3C365f0c1A0623,Hicks LLC,http://alvarez.biz/,Pakistan,Quality-focused client-server Graphical User Interface,1970,Computer Software / Engineering,8480
76,ec5Bdd3CBAfaB93,"Cole, Russell and Avery",http://www.blankenship.com/,Mongolia,De-engineered fault-tolerant challenge,2000,Law Enforcement,7012
77,DDB19Be7eeB56B4,Cummings-Rojas,https://simon-pearson.com/,Svalbard & Jan Mayen Islands,User-centric modular customer loyalty,2012,Financial Services,7529
78,dd6CA3d0bc3cAfc,"Beasley, Greene and Mahoney",http://www.petersen-lawrence.com/,Togo,Extended content-based methodology,1976,Religious Institutions,869
79,A0B9d56e61070e3,"Beasley, Sims and Allison",http://burke.info/,Latvia,Secured zero tolerance hub,1972,Facilities Services,6182
80,cBa7EFe5D05Adaf,Crawford-Rivera,https://black-ramirez.org/,Cuba,Persevering exuding budgetary management,1999,Online Publishing,7805
81,Ea3f6D52Ec73563,Montes-Hensley,https://krueger.org/,Liechtenstein,Multi-tiered secondary productivity,2009,Printing,8433
82,bC0CEd48A8000E0,Velazquez-Odom,https://stokes.com/,Djibouti,Streamlined 6thgeneration function,2002,Alternative Dispute Resolution,4044
83,c89b9b59BC4baa1,Eaton-Morales,https://www.reeves-graham.com/,Micronesia,Customer-focused explicit frame,1990,Capital Markets / Hedge Fund / Private Equity,7013
84,FEC51bce8421a7b,"Roberson, Pennington and Palmer",http://www.keith-fisher.com/,Cameroon,Adaptive bi-directional hierarchy,1993,Telecommunications,5571
85,e0E8e27eAc9CAd5,"George, Russo and Guerra",https://drake.com/,Sweden,Centralized non-volatile capability,1989,Military Industry,2880
86,B97a6CF9bf5983C,Davila Inc,https://mcconnell.info/,Cocos (Keeling) Islands,Profit-focused dedicated frame,2017,Consumer Electronics,2215
87,a0a6f9b3DbcBEb5,Mays-Preston,http://www.browning-key.com/,Mali,User-centric heuristic focus group,2006,Military Industry,5786
88,8cC1bDa330a5871,Pineda-Morton,https://www.carr.com/,United States Virgin Islands,Grass-roots methodical info-mediaries,1991,Printing,6168
89,ED889CB2FE9cbd3,Huang and Sons,https://www.bolton.com/,Eritrea,Re-contextualized dynamic hierarchy,1981,Semiconductors,7484
90,F4Dc1417BC6cb8f,Gilbert-Simon,https://www.bradford.biz/,Burundi,Grass-roots radical parallelism,1973,Newspapers / Journalism,1927
91,7ABc3c7ecA03B34,Sampson-Griffith,http://hendricks.org/,Benin,Multi-layered composite paradigm,1972,Textiles,3881
92,4e0719FBE38e0aB,Miles-Dominguez,http://www.turner.com/,Gibraltar,Organized empowering forecast,1996,Civic / Social Organization,897
93,dEbDAAeDfaed00A,Rowe and Sons,https://www.simpson.org/,El Salvador,Balanced multimedia knowledgebase,1978,Facilities Services,8172
94,61BDeCfeFD0cEF5,"Valenzuela, Holmes and Rowland",https://www.dorsey.net/,Taiwan,Persistent tertiary focus group,1999,Transportation,1483
95,4e91eD25f486110,"Best, Wade and Shepard",https://zimmerman.com/,Zimbabwe,Innovative background definition,1991,Gambling / Casinos,4873
96,0a0bfFbBbB8eC7c,Holmes Group,https://mcdowell.org/,Ethiopia,Right-sized zero tolerance focus group,1975,Photography,2988
97,BA6Cd9Dae2Efd62,Good Ltd,http://duffy.com/,Anguilla,Reverse-engineered composite moratorium,1971,Consumer Services,4292
98,E7df80C60Abd7f9,Clements-Espinoza,http://www.flowers.net/,Falkland Islands (Malvinas),Progressive modular hub,1991,Broadcast Media,236
99,AFc285dbE2fEd24,Mendez Inc,https://www.burke.net/,Kyrgyz Republic,User-friendly exuding migration,1993,Education Management,339
100,e9eB5A60Cef8354,Watkins-Kaiser,http://www.herring.com/,Togo,Synergistic background access,2009,Financial Services,2785

1 Index Organization Id Name Website Country Description Founded Industry Number of employees
2 1 FAB0d41d5b5d22c Ferrell LLC https://price.net/ Papua New Guinea Horizontal empowering knowledgebase 1990 Plastics 3498
3 2 6A7EdDEA9FaDC52 Mckinney, Riley and Day http://www.hall-buchanan.info/ Finland User-centric system-worthy leverage 2015 Glass / Ceramics / Concrete 4952
4 3 0bFED1ADAE4bcC1 Hester Ltd http://sullivan-reed.com/ China Switchable scalable moratorium 1971 Public Safety 5287
5 4 2bFC1Be8a4ce42f Holder-Sellers https://becker.com/ Turkmenistan De-engineered systemic artificial intelligence 2004 Automotive 921
6 5 9eE8A6a4Eb96C24 Mayer Group http://www.brewer.com/ Mauritius Synchronized needs-based challenge 1991 Transportation 7870
7 6 cC757116fe1C085 Henry-Thompson http://morse.net/ Bahamas Face-to-face well-modulated customer loyalty 1992 Primary / Secondary Education 4914
8 7 219233e8aFF1BC3 Hansen-Everett https://www.kidd.org/ Pakistan Seamless disintermediate collaboration 2018 Publishing Industry 7832
9 8 ccc93DCF81a31CD Mcintosh-Mora https://www.brooks.com/ Heard Island and McDonald Islands Centralized attitude-oriented capability 1970 Import / Export 4389
10 9 0B4F93aA06ED03e Carr Inc http://ross.com/ Kuwait Distributed impactful customer loyalty 1996 Plastics 8167
11 10 738b5aDe6B1C6A5 Gaines Inc http://sandoval-hooper.com/ Uzbekistan Multi-lateral scalable protocol 1997 Outsourcing / Offshoring 9698
12 11 AE61b8Ffebbc476 Kidd Group http://www.lyons.com/ Bouvet Island (Bouvetoya) Proactive foreground paradigm 2001 Primary / Secondary Education 7473
13 12 eb3B7D06cCdD609 Crane-Clarke https://www.sandoval.com/ Denmark Front-line clear-thinking encryption 2014 Food / Beverages 9011
14 13 8D0c29189C9798B Keller, Campos and Black https://www.garner.info/ Liberia Ameliorated directional emulation 2020 Museums / Institutions 2862
15 14 D2c91cc03CA394c Glover-Pope http://www.silva.biz/ United Arab Emirates Persevering contextually-based approach 2013 Medical Practice 9079
16 15 C8AC1eaf9C036F4 Pacheco-Spears https://aguilar.com/ Sweden Secured logistical synergy 1984 Maritime 769
17 16 b5D10A14f7a8AfE Hodge-Ayers http://www.archer-elliott.com/ Honduras Future-proofed radical implementation 1990 Facilities Services 8508
18 17 68139b5C4De03B4 Bowers, Guerra and Krause http://www.carrillo-nicholson.com/ Uganda De-engineered transitional strategy 1972 Primary / Secondary Education 6986
19 18 5c2EffEfdba2BdF Mckenzie-Melton http://montoya-thompson.com/ Hong Kong Reverse-engineered heuristic alliance 1998 Investment Management / Hedge Fund / Private Equity 4589
20 19 ba179F19F7925f5 Branch-Mann http://www.lozano.com/ Botswana Adaptive intangible frame 1999 Architecture / Planning 7961
21 20 c1Ce9B350BAc66b Weiss and Sons https://barrett.com/ Korea Sharable optimal functionalities 2011 Plastics 5984
22 21 8de40AC4e6EaCa4 Velez, Payne and Coffey http://burton.com/ Luxembourg Mandatory coherent synergy 1986 Wholesale 5010
23 22 Aad86a4F0385F2d Harrell LLC http://www.frey-rosario.com/ Guadeloupe Reverse-engineered mission-critical moratorium 2018 Construction 2185
24 23 22aC3FFd64fD703 Eaton, Reynolds and Vargas http://www.freeman.biz/ Monaco Self-enabling multi-tasking process improvement 2014 Luxury Goods / Jewelry 8987
25 24 5Ec4C272bCf085c Robbins-Cummings http://donaldson-wilkins.com/ Belgium Organic non-volatile hierarchy 1991 Pharmaceuticals 5038
26 25 5fDBeA8BB91a000 Jenkins Inc http://www.kirk.biz/ South Africa Front-line systematic help-desk 2002 Insurance 1215
27 26 dFfD6a6F9AC2d9C Greene, Benjamin and Novak http://www.kent.net/ Romania Centralized leadingedge moratorium 2012 Museums / Institutions 4941
28 27 4B217cC5a0674C5 Dickson, Richmond and Clay http://everett.com/ Czech Republic Team-oriented tangible complexity 1980 Real Estate / Mortgage 3122
29 28 88b1f1cDcf59a37 Prince-David http://thompson.com/ Christmas Island Virtual holistic methodology 1970 Banking / Mortgage 1046
30 29 f9F7bBCAEeC360F Ayala LLC http://www.zhang.com/ Philippines Open-source zero administration hierarchy 2021 Legal Services 7664
31 30 7Cb3AeFcE4Ba31e Rivas Group https://hebert.org/ Australia Open-architected well-modulated capacity 1998 Logistics / Procurement 4155
32 31 ccBcC32adcbc530 Sloan, Mays and Whitehead http://lawson.com/ Chad Face-to-face high-level conglomeration 1997 Civil Engineering 365
33 32 f5afd686b3d05F5 Durham, Allen and Barnes http://chan-stafford.org/ Zimbabwe Synergistic web-enabled framework 1993 Mechanical or Industrial Engineering 6135
34 33 38C6cfC5074Fa5e Fritz-Franklin http://www.lambert.com/ Nepal Automated 4thgeneration website 1972 Hospitality 4516
35 34 5Cd7efccCcba38f Burch-Ewing http://cline.net/ Taiwan User-centric 4thgeneration system engine 1981 Venture Capital / VC 7443
36 35 9E6Acb51e3F9d6F Glass, Barrera and Turner https://dunlap.com/ Kyrgyz Republic Multi-channeled 3rdgeneration open system 2020 Utilities 2610
37 36 4D4d7E18321eaeC Pineda-Cox http://aguilar.org/ Bolivia Fundamental asynchronous capability 2010 Human Resources / HR 1312
38 37 485f5d06B938F2b Baker, Mccann and Macdonald http://www.anderson-barker.com/ Kenya Cross-group user-facing focus group 2013 Legislative Office 1638
39 38 19E3a5Bf6dBDc4F Cuevas-Moss https://dodson-castaneda.net/ Guatemala Extended human-resource intranet 1994 Music 9995
40 39 6883A965c7b68F7 Hahn PLC http://newman.com/ Belarus Organic logistical leverage 2012 Electrical / Electronic Manufacturing 3715
41 40 AC5B7AA74Aa4A2E Valentine, Ferguson and Kramer http://stuart.net/ Jersey Centralized secondary time-frame 1997 Non - Profit / Volunteering 3585
42 41 decab0D5027CA6a Arroyo Inc https://www.turner.com/ Grenada Managed demand-driven website 2006 Writing / Editing 9067
43 42 dF084FbBb613eea Walls LLC http://www.reese-vasquez.biz/ Cape Verde Self-enabling fresh-thinking installation 1989 Investment Management / Hedge Fund / Private Equity 1678
44 43 A2D89Ab9bCcAd4e Mitchell, Warren and Schneider https://fox.biz/ Trinidad and Tobago Enhanced intangible time-frame 2021 Capital Markets / Hedge Fund / Private Equity 3816
45 44 77aDc905434a49f Prince PLC https://www.watts.com/ Sweden Profit-focused coherent installation 2016 Individual / Family Services 7645
46 45 235fdEFE2cfDa5F Brock-Blackwell http://www.small.com/ Benin Secured foreground emulation 1986 Online Publishing 7034
47 46 1eD64cFe986BBbE Walton-Barnett https://ashley-schaefer.com/ Western Sahara Right-sized clear-thinking flexibility 2001 Luxury Goods / Jewelry 1746
48 47 CbBbFcdd0eaE2cF Bartlett-Arroyo https://cruz.com/ Northern Mariana Islands Realigned didactic function 1976 Civic / Social Organization 3987
49 48 49aECbDaE6aBD53 Wallace, Madden and Morris http://www.blevins-fernandez.biz/ Germany Persistent real-time customer loyalty 2016 Pharmaceuticals 9443
50 49 7b3fe6e7E72bFa4 Berg-Sparks https://cisneros-love.com/ Canada Stand-alone static implementation 1974 Arts / Crafts 2073
51 50 c6DedA82A8aef7E Gonzales Ltd http://bird.com/ Tonga Managed human-resource policy 1988 Consumer Goods 9069
52 51 7D9FBF85cdC3871 Lawson and Sons https://www.wong.com/ French Southern Territories Compatible analyzing intranet 2021 Arts / Crafts 3527
53 52 7dd18Fb7cB07b65 Mcguire, Mcconnell and Olsen https://melton-briggs.com/ Korea Profound client-server frame 1988 Printing 8445
54 53 EF5B55FadccB8Fe Charles-Phillips https://bowman.com/ Cote d'Ivoire Monitored client-server implementation 2012 Mental Health Care 3450
55 54 f8D4B99e11fAF5D Odom Ltd https://www.humphrey-hess.com/ Cote d'Ivoire Advanced static process improvement 2012 Management Consulting 1825
56 55 e24D21BFd3bF1E5 Richard PLC https://holden-coleman.net/ Mayotte Object-based optimizing model 1971 Broadcast Media 4942
57 56 B9BdfEB6D3Ca44E Sampson Ltd https://blevins.com/ Cayman Islands Intuitive local adapter 2005 Farming 1418
58 57 2a74D6f3D3B268e Cherry, Le and Callahan https://waller-delacruz.biz/ Nigeria Universal human-resource collaboration 2017 Entertainment / Movie Production 7202
59 58 Bf3F3f62c8aBC33 Cherry PLC https://www.avila.info/ Marshall Islands Persistent tertiary website 1980 Plastics 8245
60 59 aeBe26B80a7a23c Melton-Nichols https://kennedy.com/ Palau User-friendly clear-thinking productivity 2021 Legislative Office 8741
61 60 aAeb29ad43886C6 Potter-Walsh http://thomas-french.org/ Turkey Optional non-volatile open system 2008 Human Resources / HR 6923
62 61 bD1bc6bB6d1FeD3 Freeman-Chen https://mathis.com/ Timor-Leste Phased next generation adapter 1973 International Trade / Development 346
63 62 EB9f456e8b7022a Soto Group https://norris.info/ Vietnam Enterprise-wide executive installation 1988 Business Supplies / Equipment 9097
64 63 Dfef38C51D8DAe3 Poole, Cruz and Whitney https://reed.info/ Reunion Balanced analyzing groupware 1978 Marketing / Advertising / Sales 2992
65 64 055ffEfB2Dd95B0 Riley Ltd http://wiley.com/ Brazil Optional exuding superstructure 1986 Textiles 9315
66 65 cBfe4dbAE1699da Erickson, Andrews and Bailey https://www.hobbs-grant.com/ Eritrea Vision-oriented secondary project 2014 Consumer Electronics 7829
67 66 fdFbecbadcdCdf1 Wilkinson, Charles and Arroyo http://hunter-mcfarland.com/ United States Virgin Islands Assimilated 24/7 archive 1996 Building Materials 602
68 67 5DCb8A5a5ca03c0 Floyd Ltd http://www.whitney.com/ Falkland Islands (Malvinas) Function-based fault-tolerant concept 2017 Public Relations / PR 2911
69 68 ce57DCbcFD6d618 Newman-Galloway https://www.scott.com/ Luxembourg Enhanced foreground collaboration 1987 Information Technology / IT 3934
70 69 5aaD187dc929371 Frazier-Butler https://www.daugherty-farley.info/ Northern Mariana Islands Persistent interactive circuit 1972 Outsourcing / Offshoring 5130
71 70 902D7Ac8b6d476b Newton Inc https://www.richmond-manning.info/ Netherlands Antilles Fundamental stable info-mediaries 1976 Military Industry 563
72 71 32BB9Ff4d939788 Duffy-Levy https://www.potter.com/ Guernsey Diverse exuding installation 1982 Wireless 6146
73 72 adcB0afbE58bAe3 Wagner LLC https://decker-esparza.com/ Uruguay Reactive attitude-oriented toolset 1987 International Affairs 6874
74 73 dfcA1c84AdB61Ac Mccall-Holmes http://www.dean.com/ Benin Object-based value-added database 2009 Legal Services 696
75 74 208044AC2fe52F3 Massey LLC https://frazier.biz/ Suriname Configurable zero administration Graphical User Interface 1986 Accounting 5004
76 75 f3C365f0c1A0623 Hicks LLC http://alvarez.biz/ Pakistan Quality-focused client-server Graphical User Interface 1970 Computer Software / Engineering 8480
77 76 ec5Bdd3CBAfaB93 Cole, Russell and Avery http://www.blankenship.com/ Mongolia De-engineered fault-tolerant challenge 2000 Law Enforcement 7012
78 77 DDB19Be7eeB56B4 Cummings-Rojas https://simon-pearson.com/ Svalbard & Jan Mayen Islands User-centric modular customer loyalty 2012 Financial Services 7529
79 78 dd6CA3d0bc3cAfc Beasley, Greene and Mahoney http://www.petersen-lawrence.com/ Togo Extended content-based methodology 1976 Religious Institutions 869
80 79 A0B9d56e61070e3 Beasley, Sims and Allison http://burke.info/ Latvia Secured zero tolerance hub 1972 Facilities Services 6182
81 80 cBa7EFe5D05Adaf Crawford-Rivera https://black-ramirez.org/ Cuba Persevering exuding budgetary management 1999 Online Publishing 7805
82 81 Ea3f6D52Ec73563 Montes-Hensley https://krueger.org/ Liechtenstein Multi-tiered secondary productivity 2009 Printing 8433
83 82 bC0CEd48A8000E0 Velazquez-Odom https://stokes.com/ Djibouti Streamlined 6thgeneration function 2002 Alternative Dispute Resolution 4044
84 83 c89b9b59BC4baa1 Eaton-Morales https://www.reeves-graham.com/ Micronesia Customer-focused explicit frame 1990 Capital Markets / Hedge Fund / Private Equity 7013
85 84 FEC51bce8421a7b Roberson, Pennington and Palmer http://www.keith-fisher.com/ Cameroon Adaptive bi-directional hierarchy 1993 Telecommunications 5571
86 85 e0E8e27eAc9CAd5 George, Russo and Guerra https://drake.com/ Sweden Centralized non-volatile capability 1989 Military Industry 2880
87 86 B97a6CF9bf5983C Davila Inc https://mcconnell.info/ Cocos (Keeling) Islands Profit-focused dedicated frame 2017 Consumer Electronics 2215
88 87 a0a6f9b3DbcBEb5 Mays-Preston http://www.browning-key.com/ Mali User-centric heuristic focus group 2006 Military Industry 5786
89 88 8cC1bDa330a5871 Pineda-Morton https://www.carr.com/ United States Virgin Islands Grass-roots methodical info-mediaries 1991 Printing 6168
90 89 ED889CB2FE9cbd3 Huang and Sons https://www.bolton.com/ Eritrea Re-contextualized dynamic hierarchy 1981 Semiconductors 7484
91 90 F4Dc1417BC6cb8f Gilbert-Simon https://www.bradford.biz/ Burundi Grass-roots radical parallelism 1973 Newspapers / Journalism 1927
92 91 7ABc3c7ecA03B34 Sampson-Griffith http://hendricks.org/ Benin Multi-layered composite paradigm 1972 Textiles 3881
93 92 4e0719FBE38e0aB Miles-Dominguez http://www.turner.com/ Gibraltar Organized empowering forecast 1996 Civic / Social Organization 897
94 93 dEbDAAeDfaed00A Rowe and Sons https://www.simpson.org/ El Salvador Balanced multimedia knowledgebase 1978 Facilities Services 8172
95 94 61BDeCfeFD0cEF5 Valenzuela, Holmes and Rowland https://www.dorsey.net/ Taiwan Persistent tertiary focus group 1999 Transportation 1483
96 95 4e91eD25f486110 Best, Wade and Shepard https://zimmerman.com/ Zimbabwe Innovative background definition 1991 Gambling / Casinos 4873
97 96 0a0bfFbBbB8eC7c Holmes Group https://mcdowell.org/ Ethiopia Right-sized zero tolerance focus group 1975 Photography 2988
98 97 BA6Cd9Dae2Efd62 Good Ltd http://duffy.com/ Anguilla Reverse-engineered composite moratorium 1971 Consumer Services 4292
99 98 E7df80C60Abd7f9 Clements-Espinoza http://www.flowers.net/ Falkland Islands (Malvinas) Progressive modular hub 1991 Broadcast Media 236
100 99 AFc285dbE2fEd24 Mendez Inc https://www.burke.net/ Kyrgyz Republic User-friendly exuding migration 1993 Education Management 339
101 100 e9eB5A60Cef8354 Watkins-Kaiser http://www.herring.com/ Togo Synergistic background access 2009 Financial Services 2785

48
applications/ColossalQA/data/tests/test.html

@ -1,7 +1,7 @@
<!DOCTYPE html>
<!-- saved from url=(0046)https://docs.python.org/3/library/logging.html -->
<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/">
<meta property="og:title" content="logging — Logging facility for Python">
<meta property="og:type" content="website">
@ -16,18 +16,18 @@
<meta name="theme-color" content="#3776ab">
<title>logging — Logging facility for Python — Python 3.11.5 documentation</title><meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" type="text/css" href="./test_files/pygments.css">
<link rel="stylesheet" type="text/css" href="./test_files/pydoctheme.css">
<link id="pygments_dark_css" media="(prefers-color-scheme: dark)" rel="stylesheet" type="text/css" href="./test_files/pygments_dark.css">
<script data-url_root="../" id="documentation_options" src="./test_files/documentation_options.js.download"></script>
<script src="./test_files/jquery.js.download"></script>
<script src="./test_files/underscore.js.download"></script>
<script src="./test_files/doctools.js.download"></script>
<script src="./test_files/sidebar.js.download"></script>
<link rel="search" type="application/opensearchdescription+xml" title="Search within Python 3.11.5 documentation" href="https://docs.python.org/3/_static/opensearch.xml">
<link rel="author" title="About these documents" href="https://docs.python.org/3/about.html">
<link rel="index" title="Index" href="https://docs.python.org/3/genindex.html">
@ -36,11 +36,11 @@
<link rel="next" title="logging.config — Logging configuration" href="https://docs.python.org/3/library/logging.config.html">
<link rel="prev" title="getopt — C-style parser for command line options" href="https://docs.python.org/3/library/getopt.html">
<link rel="canonical" href="https://docs.python.org/3/library/logging.html">
<style>
@media only screen {
table.full-width-table {
@ -52,7 +52,7 @@
<link rel="shortcut icon" type="image/png" href="./test_files/py.svg">
<script type="text/javascript" src="./test_files/copybutton.js.download"></script>
<script type="text/javascript" src="./test_files/menu.js.download"></script>
<script type="text/javascript" src="./test_files/themetoggle.js.download"></script>
<script type="text/javascript" src="./test_files/themetoggle.js.download"></script>
</head>
<body data-new-gr-c-s-check-loaded="14.1038.0" data-gr-ext-installed="">
@ -79,7 +79,7 @@
<div class="menu-wrapper">
<nav class="menu" role="navigation" aria-label="main navigation" tabindex="-1">
<div class="language_switcher_placeholder"><select id="language_select"><option value="en" selected="selected">English</option><option value="es">Spanish</option><option value="fr">French</option><option value="ja">Japanese</option><option value="ko">Korean</option><option value="pt-br">Brazilian Portuguese</option><option value="tr">Turkish</option><option value="zh-cn">Simplified Chinese</option><option value="zh-tw">Traditional Chinese</option></select></div>
<label class="theme-selector-label">
Theme
<select class="theme-selector" oninput="activateTheme(this.value)">
@ -131,7 +131,7 @@
</div>
</div>
<div class="related" role="navigation" aria-label="related navigation">
<h3>Navigation</h3>
<ul>
@ -151,7 +151,7 @@
<div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div>
</li>
<li>
</li>
<li id="cpython-language-and-version">
<a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> »
@ -161,7 +161,7 @@
<li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html" accesskey="U">Generic Operating System Services</a> »</li>
<li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li>
<li class="right">
<div class="inline-search" role="search">
<form class="inline-search" action="https://docs.python.org/3/search.html" method="get">
@ -180,15 +180,15 @@
<option value="dark">Dark</option>
</select>
</label> |</li>
</ul>
</div>
</div>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<section id="module-logging">
<span id="logging-logging-facility-for-python"></span><h1><a class="reference internal" href="https://docs.python.org/3/library/logging.html#module-logging" title="logging: Flexible event logging system for applications."><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code></a> — Logging facility for Python<a class="headerlink" href="https://docs.python.org/3/library/logging.html#module-logging" title="Permalink to this headline"></a></h1>
<p><strong>Source code:</strong> <a class="reference external" href="https://github.com/python/cpython/tree/3.11/Lib/logging/__init__.py">Lib/logging/__init__.py</a></p>
@ -1871,7 +1871,7 @@ library.</p>
</div>
<div id="sidebarbutton"><span>«</span></div></div>
<div class="clearer"></div>
</div>
</div>
<div class="related" role="navigation" aria-label="related navigation">
<h3>Navigation</h3>
<ul>
@ -1891,7 +1891,7 @@ library.</p>
<div class="version_switcher_placeholder"><select id="version_select"><option value="3.13">dev (3.13)</option><option value="3.12">pre (3.12)</option><option value="3.11" selected="selected">3.11.5</option><option value="3.10">3.10</option><option value="3.9">3.9</option><option value="3.8">3.8</option><option value="3.7">3.7</option><option value="3.6">3.6</option><option value="3.5">3.5</option><option value="2.7">2.7</option></select></div>
</li>
<li>
</li>
<li id="cpython-language-and-version">
<a href="https://docs.python.org/3/index.html">3.11.5 Documentation</a> »
@ -1901,7 +1901,7 @@ library.</p>
<li class="nav-item nav-item-2"><a href="https://docs.python.org/3/library/allos.html">Generic Operating System Services</a> »</li>
<li class="nav-item nav-item-this"><a href="https://docs.python.org/3/library/logging.html"><code class="xref py py-mod docutils literal notranslate"><span class="pre">logging</span></code> — Logging facility for Python</a></li>
<li class="right">
<div class="inline-search" role="search">
<form class="inline-search" action="https://docs.python.org/3/search.html" method="get">
@ -1920,9 +1920,9 @@ library.</p>
<option value="dark">Dark</option>
</select>
</label> |</li>
</ul>
</div>
</div>
<div class="footer">
© <a href="https://docs.python.org/3/copyright.html">Copyright</a> 2001-2023, Python Software Foundation.
<br>
@ -1946,7 +1946,7 @@ library.</p>
</div>
<script type="text/javascript" src="./test_files/switchers.js.download"></script>
<div id="hl-aria-live-message-container" aria-live="polite" class="visually-hidden"></div><div id="hl-aria-live-alert-container" role="alert" aria-live="assertive" class="visually-hidden"></div></body><grammarly-desktop-integration data-grammarly-shadow-root="true"><template shadowrootmode="open"><style>
div.grammarly-desktop-integration {
position: absolute;
@ -1967,4 +1967,4 @@ library.</p>
div.grammarly-desktop-integration:before {
content: attr(data-content);
}
</style><div aria-label="grammarly-integration" role="group" tabindex="-1" class="grammarly-desktop-integration" data-content="{&quot;mode&quot;:&quot;limited&quot;,&quot;isActive&quot;:false,&quot;isUserDisabled&quot;:false}"></div></template></grammarly-desktop-integration></html>
</style><div aria-label="grammarly-integration" role="group" tabindex="-1" class="grammarly-desktop-integration" data-content="{&quot;mode&quot;:&quot;limited&quot;,&quot;isActive&quot;:false,&quot;isUserDisabled&quot;:false}"></div></template></grammarly-desktop-integration></html>

4
applications/ColossalQA/data/tests/test.md

@ -34,9 +34,9 @@ python api_server.py --host localhost --port $PORT_NUMBER --model $PATH_TO_MODEL
### Collect your data
For ChatGPT based Agent we support document retrieval and simple sql search.
If you want to run the demo locally, we provided document retrieval based conversation system built upon langchain. It accept a wide range of documents.
If you want to run the demo locally, we provided document retrieval based conversation system built upon langchain. It accept a wide range of documents.
Read comments under ./colossalqa/data_loader for more detail
Read comments under ./colossalqa/data_loader for more detail
### Serving
Currently use vllm will replace with colossal inference when ready. Please refer class VllmLLM.

76
applications/ColossalQA/data/tests/test.txt

@ -1,38 +1,38 @@
Your Name
Lorem ipsum dolor sit amet, consectetuer adipiscing elit
123 Your Street
Your City, ST 12345
(123) 456-7890
no_reply@example.com
EXPERIENCE
Company, Location — Job Title
MONTH 20XX - PRESENT
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
Company, Location — Job Title
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
Company, Location — Job Title
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
EDUCATION
School Name, Location — Degree
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore.
School Name, Location — Degree
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam.
PROJECTS
Project Name — Detail
Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
SKILLS
* Lorem ipsum dolor sit amet.
* Consectetuer adipiscing elit.
* Sed diam nonummy nibh euismod tincidunt.
* Laoreet dolore magna aliquam erat volutpat.
AWARDS
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
LANGUAGES
Lorem ipsum, Dolor sit amet, Consectetuer
Your Name
Lorem ipsum dolor sit amet, consectetuer adipiscing elit
123 Your Street
Your City, ST 12345
(123) 456-7890
no_reply@example.com
EXPERIENCE
Company, Location — Job Title
MONTH 20XX - PRESENT
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
Company, Location — Job Title
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
Company, Location — Job Title
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh.
EDUCATION
School Name, Location — Degree
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore.
School Name, Location — Degree
MONTH 20XX - MONTH 20XX
Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam.
PROJECTS
Project Name — Detail
Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
SKILLS
* Lorem ipsum dolor sit amet.
* Consectetuer adipiscing elit.
* Sed diam nonummy nibh euismod tincidunt.
* Laoreet dolore magna aliquam erat volutpat.
AWARDS
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
Lorem ipsum dolor sit amet Consectetuer adipiscing elit, Sed diam nonummy
Nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
LANGUAGES
Lorem ipsum, Dolor sit amet, Consectetuer

33
applications/ColossalQA/examples/retrieval_conversation_universal.py

@ -1,22 +1,27 @@
import argparse
from colossalqa.retrieval_conversation_universal import UniversalRetrievalConversation
if __name__ == '__main__':
if __name__ == "__main__":
# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('--en_model_path', type=str, default=None)
parser.add_argument('--zh_model_path', type=str, default=None)
parser.add_argument('--zh_model_name', type=str, default=None)
parser.add_argument('--en_model_name', type=str, default=None)
parser.add_argument('--sql_file_path', type=str, default=None, help='path to the a empty folder for storing sql files for indexing')
parser.add_argument("--en_model_path", type=str, default=None)
parser.add_argument("--zh_model_path", type=str, default=None)
parser.add_argument("--zh_model_name", type=str, default=None)
parser.add_argument("--en_model_name", type=str, default=None)
parser.add_argument(
"--sql_file_path", type=str, default=None, help="path to the a empty folder for storing sql files for indexing"
)
args = parser.parse_args()
# Will ask for documents path in running time
session = UniversalRetrievalConversation(files_en=None,
files_zh=None,
zh_model_path=args.zh_model_path, en_model_path=args.en_model_path,
zh_model_name=args.zh_model_name, en_model_name=args.en_model_name,
sql_file_path=args.sql_file_path
)
session = UniversalRetrievalConversation(
files_en=None,
files_zh=None,
zh_model_path=args.zh_model_path,
en_model_path=args.en_model_path,
zh_model_name=args.zh_model_name,
en_model_name=args.en_model_name,
sql_file_path=args.sql_file_path,
)
session.start_test_session()

12
applications/ColossalQA/examples/webui_demo/RAG_ChatBot.py

@ -5,13 +5,7 @@ from colossalqa.chain.retrieval_qa.base import RetrievalQA
from colossalqa.data_loader.document_loader import DocumentLoader
from colossalqa.memory import ConversationBufferWithSummary
from colossalqa.mylogging import get_logger
from colossalqa.prompt.prompt import (
PROMPT_DISAMBIGUATE_ZH,
PROMPT_RETRIEVAL_QA_ZH,
SUMMARY_PROMPT_ZH,
ZH_RETRIEVAL_QA_REJECTION_ANSWER,
ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS,
)
from colossalqa.prompt.prompt import ZH_RETRIEVAL_QA_REJECTION_ANSWER, ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS
from colossalqa.retriever import CustomRetriever
from langchain import LLMChain
from langchain.embeddings import HuggingFaceEmbeddings
@ -116,13 +110,13 @@ class RAG_ChatBot:
def split_docs(self, documents):
doc_splits = self.text_splitter.split_documents(documents)
return doc_splits
def clear_docs(self, **kwargs):
self.documents = []
self.docs_names = []
self.info_retriever.clear_documents()
self.memory.initiate_document_retrieval_chain(self.llm, kwargs["gen_qa_prompt"], self.info_retriever)
def reset_config(self, rag_config):
self.rag_config = rag_config
self.set_embed_model(**self.rag_config["embed"])

2
applications/ColossalQA/examples/webui_demo/README.md

@ -115,4 +115,4 @@ python webui.py --http_host "your-backend-api-host" --http_port "your-backend-ap
After launching the script, you can upload files and engage with the chatbot through your web browser.
![ColossalQA Demo](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/colossalqa/new_ui.png)
![ColossalQA Demo](https://raw.githubusercontent.com/hpcaitech/public_assets/main/applications/colossalqa/new_ui.png)

52
applications/ColossalQA/examples/webui_demo/config.py

@ -1,58 +1,30 @@
from colossalqa.prompt.prompt import (
PROMPT_DISAMBIGUATE_ZH,
PROMPT_RETRIEVAL_QA_ZH,
SUMMARY_PROMPT_ZH,
ZH_RETRIEVAL_QA_REJECTION_ANSWER,
ZH_RETRIEVAL_QA_TRIGGER_KEYWORDS,
)
from colossalqa.prompt.prompt import PROMPT_DISAMBIGUATE_ZH, PROMPT_RETRIEVAL_QA_ZH, SUMMARY_PROMPT_ZH
from colossalqa.text_splitter import ChineseTextSplitter
ALL_CONFIG = {
"embed": {
"embed_name": "m3e", # embedding model name
"embed_model_name_or_path": "moka-ai/m3e-base", # path to embedding model, could be a local path or a huggingface path
"embed_model_device": {
"device": "cpu"
}
"embed_model_device": {"device": "cpu"},
},
"model": {
"mode": "api", # "local" for loading models, "api" for using model api
"model_name": "chatgpt_api", # local model name, "chatgpt_api" or "pangu_api"
"model_path": "", # path to the model, could be a local path or a huggingface path. don't need if using an api
"device": {
"device": "cuda"
}
},
"splitter": {
"name": ChineseTextSplitter
},
"retrieval": {
"retri_top_k": 3,
"retri_kb_file_path": "./", # path to store database files
"verbose": True
"model_path": "", # path to the model, could be a local path or a huggingface path. don't need if using an api
"device": {"device": "cuda"},
},
"splitter": {"name": ChineseTextSplitter},
"retrieval": {"retri_top_k": 3, "retri_kb_file_path": "./", "verbose": True}, # path to store database files
"chain": {
"mem_summary_prompt": SUMMARY_PROMPT_ZH, # summary prompt template
"mem_human_prefix": "用户",
"mem_ai_prefix": "Assistant",
"mem_max_tokens": 2000,
"mem_llm_kwargs": {
"max_new_tokens": 50,
"temperature": 1,
"do_sample": True
},
"mem_llm_kwargs": {"max_new_tokens": 50, "temperature": 1, "do_sample": True},
"disambig_prompt": PROMPT_DISAMBIGUATE_ZH, # disambiguate prompt template
"disambig_llm_kwargs": {
"max_new_tokens": 30,
"temperature": 1,
"do_sample": True
},
"gen_llm_kwargs": {
"max_new_tokens": 100,
"temperature": 1,
"do_sample": True
},
"disambig_llm_kwargs": {"max_new_tokens": 30, "temperature": 1, "do_sample": True},
"gen_llm_kwargs": {"max_new_tokens": 100, "temperature": 1, "do_sample": True},
"gen_qa_prompt": PROMPT_RETRIEVAL_QA_ZH, # generation prompt template
"verbose": True
}
}
"verbose": True,
},
}

25
applications/ColossalQA/examples/webui_demo/server.py

@ -1,27 +1,18 @@
import argparse
import os
from typing import List, Union
import config
import uvicorn
from colossalqa.local.llm import ColossalAPI, ColossalLLM
from colossalqa.data_loader.document_loader import DocumentLoader
from colossalqa.mylogging import get_logger
from colossalqa.retrieval_conversation_zh import ChineseRetrievalConversation
from colossalqa.retriever import CustomRetriever
from enum import Enum
from fastapi import FastAPI, Request
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel, Field
import uvicorn
import config
from pydantic import BaseModel
from RAG_ChatBot import RAG_ChatBot
from utils import DocAction
logger = get_logger()
def parseArgs():
parser = argparse.ArgumentParser()
parser.add_argument("--http_host", default="0.0.0.0")
@ -36,6 +27,7 @@ class DocUpdateReq(BaseModel):
doc_files: Union[List[str], str, None] = None
action: DocAction = DocAction.ADD
class GenerationTaskReq(BaseModel):
user_input: str
@ -45,7 +37,7 @@ def update_docs(data: DocUpdateReq, request: Request):
if data.action == "add":
if isinstance(data.doc_files, str):
data.doc_files = [data.doc_files]
chatbot.load_doc_from_files(files = data.doc_files)
chatbot.load_doc_from_files(files=data.doc_files)
all_docs = ""
for doc in chatbot.docs_names:
all_docs += f"\t{doc}\n\n"
@ -79,17 +71,18 @@ if __name__ == "__main__":
elif all_config["model"]["mode"] == "api":
if model_name == "pangu_api":
from colossalqa.local.pangu_llm import Pangu
gen_config = {
"user": "User",
"max_tokens": all_config["chain"]["disambig_llm_kwargs"]["max_new_tokens"],
"temperature": all_config["chain"]["disambig_llm_kwargs"]["temperature"],
"n": 1 # the number of responses generated
"n": 1, # the number of responses generated
}
llm = Pangu(gen_config=gen_config)
llm.set_auth_config() # verify user's auth info here
elif model_name == "chatgpt_api":
from langchain.llms import OpenAI
llm = OpenAI()
else:
raise ValueError("Unsupported mode.")

31
applications/ColossalQA/examples/webui_demo/webui.py

@ -1,24 +1,26 @@
import argparse
import json
import os
import requests
import gradio as gr
import requests
from utils import DocAction
def parseArgs():
parser = argparse.ArgumentParser()
parser.add_argument("--http_host", default="0.0.0.0")
parser.add_argument("--http_port", type=int, default=13666)
return parser.parse_args()
def get_response(data, url):
headers = {"Content-type": "application/json"}
response = requests.post(url, json=data, headers=headers)
response = json.loads(response.content)
return response
def add_text(history, text):
history = history + [(text, None)]
return history, gr.update(value=None, interactive=True)
@ -28,35 +30,28 @@ def add_file(history, files):
files_string = "\n".join([os.path.basename(file.name) for file in files])
doc_files = [file.name for file in files]
data = {
"doc_files": doc_files,
"action": DocAction.ADD
}
data = {"doc_files": doc_files, "action": DocAction.ADD}
response = get_response(data, update_url)["response"]
history = history + [(files_string, response)]
return history
def bot(history):
data = {
"user_input": history[-1][0].strip()
}
def bot(history):
data = {"user_input": history[-1][0].strip()}
response = get_response(data, gen_url)
if response["error"] != "":
raise gr.Error(response["error"])
history[-1][1] = response["response"]
yield history
def restart(chatbot, txt):
# Reset the conversation state and clear the chat history
data = {
"doc_files": "",
"action": DocAction.CLEAR
}
response = get_response(data, update_url)
data = {"doc_files": "", "action": DocAction.CLEAR}
get_response(data, update_url)
return gr.update(value=None), gr.update(value=None, interactive=True)
@ -97,7 +92,7 @@ with gr.Blocks(css=CSS) as demo:
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(bot, chatbot, chatbot)
# Clear the original textbox
txt_msg.then(lambda: gr.update(value=None, interactive=True), None, [txt], queue=False)
txt_msg.then(lambda: gr.update(value=None, interactive=True), None, [txt], queue=False)
# Click Upload Button: 1. upload files 2. send config to backend, initalize model 3. get response "conversation_ready" = True/False
file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False)

2
applications/ColossalQA/pytest.ini

@ -1,4 +1,4 @@
[pytest]
markers =
dist: tests which are run in a multi-GPU or multi-machine environment (at least 4 GPUs)
largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs)
largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs)

14
applications/ColossalQA/tests/test_document_loader.py

@ -1,21 +1,21 @@
import os
from colossalqa.data_loader.document_loader import DocumentLoader
def test_add_document():
PATH = os.environ.get('TEST_DOCUMENT_LOADER_DATA_PATH')
files = [[PATH, 'all data']]
PATH = os.environ.get("TEST_DOCUMENT_LOADER_DATA_PATH")
files = [[PATH, "all data"]]
document_loader = DocumentLoader(files)
documents = document_loader.all_data
all_files = []
for doc in documents:
assert isinstance(doc.page_content, str)==True
if doc.metadata['source'] not in all_files:
all_files.append(doc.metadata['source'])
assert isinstance(doc.page_content, str) == True
if doc.metadata["source"] not in all_files:
all_files.append(doc.metadata["source"])
print(all_files)
assert len(all_files) == 6
if __name__=='__main__':
if __name__ == "__main__":
test_add_document()

80
applications/ColossalQA/tests/test_retrieval_qa.py

@ -4,56 +4,44 @@ from colossalqa.retrieval_conversation_universal import UniversalRetrievalConver
def test_en_retrievalQA():
data_path_en = os.environ.get('TEST_DATA_PATH_EN')
data_path_zh = os.environ.get('TEST_DATA_PATH_ZH')
en_model_path = os.environ.get('EN_MODEL_PATH')
zh_model_path = os.environ.get('ZH_MODEL_PATH')
zh_model_name = os.environ.get('ZH_MODEL_NAME')
en_model_name = os.environ.get('EN_MODEL_NAME')
sql_file_path = os.environ.get('SQL_FILE_PATH')
qa_session = UniversalRetrievalConversation(files_en=[{
'data_path': data_path_en,
'name': 'company information',
'separator': '\n'
}],
files_zh=[{
'data_path': data_path_zh,
'name': 'company information',
'separator': '\n'
}],
zh_model_path=zh_model_path,
en_model_path=en_model_path,
zh_model_name=zh_model_name,
en_model_name=en_model_name,
sql_file_path=sql_file_path)
ans = qa_session.run("which company runs business in hotel industry?", which_language='en')
data_path_en = os.environ.get("TEST_DATA_PATH_EN")
data_path_zh = os.environ.get("TEST_DATA_PATH_ZH")
en_model_path = os.environ.get("EN_MODEL_PATH")
zh_model_path = os.environ.get("ZH_MODEL_PATH")
zh_model_name = os.environ.get("ZH_MODEL_NAME")
en_model_name = os.environ.get("EN_MODEL_NAME")
sql_file_path = os.environ.get("SQL_FILE_PATH")
qa_session = UniversalRetrievalConversation(
files_en=[{"data_path": data_path_en, "name": "company information", "separator": "\n"}],
files_zh=[{"data_path": data_path_zh, "name": "company information", "separator": "\n"}],
zh_model_path=zh_model_path,
en_model_path=en_model_path,
zh_model_name=zh_model_name,
en_model_name=en_model_name,
sql_file_path=sql_file_path,
)
ans = qa_session.run("which company runs business in hotel industry?", which_language="en")
print(ans)
def test_zh_retrievalQA():
data_path_en = os.environ.get('TEST_DATA_PATH_EN')
data_path_zh = os.environ.get('TEST_DATA_PATH_ZH')
en_model_path = os.environ.get('EN_MODEL_PATH')
zh_model_path = os.environ.get('ZH_MODEL_PATH')
zh_model_name = os.environ.get('ZH_MODEL_NAME')
en_model_name = os.environ.get('EN_MODEL_NAME')
sql_file_path = os.environ.get('SQL_FILE_PATH')
qa_session = UniversalRetrievalConversation(files_en=[{
'data_path': data_path_en,
'name': 'company information',
'separator': '\n'
}],
files_zh=[{
'data_path': data_path_zh,
'name': 'company information',
'separator': '\n'
}],
zh_model_path=zh_model_path,
en_model_path=en_model_path,
zh_model_name=zh_model_name,
en_model_name=en_model_name,
sql_file_path=sql_file_path)
ans = qa_session.run("哪家公司在经营酒店业务?", which_language='zh')
data_path_en = os.environ.get("TEST_DATA_PATH_EN")
data_path_zh = os.environ.get("TEST_DATA_PATH_ZH")
en_model_path = os.environ.get("EN_MODEL_PATH")
zh_model_path = os.environ.get("ZH_MODEL_PATH")
zh_model_name = os.environ.get("ZH_MODEL_NAME")
en_model_name = os.environ.get("EN_MODEL_NAME")
sql_file_path = os.environ.get("SQL_FILE_PATH")
qa_session = UniversalRetrievalConversation(
files_en=[{"data_path": data_path_en, "name": "company information", "separator": "\n"}],
files_zh=[{"data_path": data_path_zh, "name": "company information", "separator": "\n"}],
zh_model_path=zh_model_path,
en_model_path=en_model_path,
zh_model_name=zh_model_name,
en_model_name=en_model_name,
sql_file_path=sql_file_path,
)
ans = qa_session.run("哪家公司在经营酒店业务?", which_language="zh")
print(ans)

2
applications/ColossalQA/version.txt

@ -1 +1 @@
0.0.1
0.0.1

2
colossalai/__init__.py

@ -1,5 +1,5 @@
from .initialize import launch, launch_from_openmpi, launch_from_slurm, launch_from_torch
from . import accelerator
from .initialize import launch, launch_from_openmpi, launch_from_slurm, launch_from_torch
try:
# .version will be created by setup.py

9
colossalai/booster/plugin/torch_fsdp_plugin.py

@ -27,7 +27,7 @@ from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
from torch.utils.data import DataLoader
from colossalai.checkpoint_io import CheckpointIO, GeneralCheckpointIO, utils, CheckpointIndexFile
from colossalai.checkpoint_io import CheckpointIndexFile, CheckpointIO, GeneralCheckpointIO, utils
from colossalai.cluster import DistCoordinator
from colossalai.interface import ModelWrapper, OptimizerWrapper
@ -93,9 +93,7 @@ class TorchFSDPCheckpointIO(GeneralCheckpointIO):
Path(checkpoint_path).mkdir(parents=True, exist_ok=True)
with FSDP.state_dict_type(
model.unwrap(),
StateDictType.FULL_STATE_DICT,
FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
model.unwrap(), StateDictType.FULL_STATE_DICT, FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
):
state_dict = model.unwrap().state_dict()
@ -172,7 +170,7 @@ class TorchFSDPCheckpointIO(GeneralCheckpointIO):
with FSDP.state_dict_type(
optimizer.unwrap_model().unwrap(),
StateDictType.FULL_STATE_DICT,
FullStateDictConfig(offload_to_cpu=True, rank0_only=True)
FullStateDictConfig(offload_to_cpu=True, rank0_only=True),
):
fsdp_optim_state = FSDP.full_optim_state_dict(
optimizer.unwrap_model().unwrap(), optim=optimizer, rank0_only=True
@ -241,7 +239,6 @@ class TorchFSDPCheckpointIO(GeneralCheckpointIO):
)
optimizer.load_state_dict(fsdp_state)
def save_lr_scheduler(self, lr_scheduler: LRScheduler, checkpoint: str):
"""
Save model to checkpoint but only on master process.

3
colossalai/checkpoint_io/utils.py

@ -294,6 +294,7 @@ def shard_optimizer_checkpoint(state_dict: dict, max_shard_size: int = 1024) ->
# Helper functions for saving state dict
# ======================================
def save_state_dict(state_dict: dict, checkpoint_file_path: str, use_safetensors: bool) -> None:
"""
Save state dict to checkpoint.
@ -305,7 +306,7 @@ def save_state_dict(state_dict: dict, checkpoint_file_path: str, use_safetensors
"""
# Move all tensors in the state_dict to CPU before saving to avoid serialization issues
state_dict_cpu = tree_map(lambda x: x.cpu() if torch.is_tensor(x) else x, state_dict)
if use_safetensors:
assert is_safetensors_available(), "safetensors is not available."
assert checkpoint_file_path.endswith(

25
colossalai/cluster/process_group_mesh.py

@ -174,16 +174,20 @@ class ProcessGroupMesh:
List[Tuple[int, ...]]: Coordinates along the axis.
"""
if isinstance(axis, int):
axis = [axis,]
axis = [
axis,
]
assert isinstance(indices_at_axis[0], int)
indices_at_axis = [indices_at_axis,]
indices_at_axis = [
indices_at_axis,
]
def add_index(base_coord, axis, indices_at_axis):
coords_in_group = []
for idx in indices_at_axis:
coords_in_group.append(base_coord[:axis] + (idx,) + base_coord[axis + 1 :])
return coords_in_group
coords_in_group = [base_coord]
for ax, indices_at_ax in zip(axis, indices_at_axis):
new_coords_in_group = []
@ -194,7 +198,10 @@ class ProcessGroupMesh:
return coords_in_group
def create_group_along_axis(
self, axis: Union[int, List[int]], indices_at_axis: Optional[Union[List[int], List[List[int]]]] = None, backend: Optional[str] = None
self,
axis: Union[int, List[int]],
indices_at_axis: Optional[Union[List[int], List[List[int]]]] = None,
backend: Optional[str] = None,
) -> ProcessGroup:
"""Create all process groups along the given axis, and return the one which the current process belongs to.
@ -207,11 +214,15 @@ class ProcessGroupMesh:
ProcessGroup: The process group along the given axis which the current process belongs to.
"""
if isinstance(axis, int):
axis = [axis,]
axis = [
axis,
]
if indices_at_axis is not None:
assert isinstance(indices_at_axis[0], int)
indices_at_axis = [indices_at_axis,]
indices_at_axis = [
indices_at_axis,
]
indices_at_axis = indices_at_axis or [list(range(self._shape[ax])) for ax in axis]
reduced_shape = list(self._shape)
# the choices on the axis are reduced to 1, since it's determined by `indices_at_axis`

49
colossalai/inference/engine/modeling/llama.py

@ -29,13 +29,17 @@ except:
try:
from colossalai.kernel.triton.flash_decoding import token_flash_decoding
HAS_TRITON_FLASH_DECODING_KERNEL = True
except:
print("no triton flash decoding support, please install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8")
print(
"no triton flash decoding support, please install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8"
)
HAS_TRITON_FLASH_DECODING_KERNEL = False
try:
from flash_attn import flash_attn_with_kvcache
HAS_FLASH_KERNEL = True
except:
HAS_FLASH_KERNEL = False
@ -48,6 +52,7 @@ def rotate_half(x):
x2 = x[..., x.shape[-1] // 2 :]
return torch.cat((-x2, x1), dim=-1)
def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
# The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
cos = cos.squeeze(1).squeeze(0) # [seq_len, dim]
@ -96,17 +101,22 @@ def llama_triton_context_attention(
infer_state.max_len_in_batch,
)
def llama_triton_token_attention(query_states, attn_output, infer_state, num_key_value_groups=1, q_head_num = -1, head_dim = -1):
def llama_triton_token_attention(
query_states, attn_output, infer_state, num_key_value_groups=1, q_head_num=-1, head_dim=-1
):
if HAS_TRITON_FLASH_DECODING_KERNEL and q_head_num != -1 and head_dim != -1:
token_flash_decoding(q = query_states,
o_tensor = attn_output,
infer_state = infer_state,
q_head_num = q_head_num,
head_dim = head_dim,
cache_k = infer_state.cache_manager.key_buffer[infer_state.decode_layer_id],
cache_v = infer_state.cache_manager.value_buffer[infer_state.decode_layer_id])
return
token_flash_decoding(
q=query_states,
o_tensor=attn_output,
infer_state=infer_state,
q_head_num=q_head_num,
head_dim=head_dim,
cache_k=infer_state.cache_manager.key_buffer[infer_state.decode_layer_id],
cache_v=infer_state.cache_manager.value_buffer[infer_state.decode_layer_id],
)
return
if num_key_value_groups == 1:
token_attention_fwd(
query_states,
@ -459,14 +469,15 @@ class LlamaInferenceForwards:
)
if HAS_LIGHTLLM_KERNEL:
attn_output = torch.empty_like(query_states)
llama_triton_token_attention(query_states = query_states,
attn_output = attn_output,
infer_state = infer_state,
num_key_value_groups = self.num_key_value_groups,
q_head_num = q_len * self.num_heads,
head_dim = self.head_dim)
llama_triton_token_attention(
query_states=query_states,
attn_output=attn_output,
infer_state=infer_state,
num_key_value_groups=self.num_key_value_groups,
q_head_num=q_len * self.num_heads,
head_dim=self.head_dim,
)
else:
self.num_heads // self.num_key_value_heads
cache_k = infer_state.cache_manager.key_buffer[infer_state.decode_layer_id]

202
colossalai/inference/quant/gptq/cai_gptq/cai_quant_linear.py

@ -18,15 +18,15 @@ from .gptq_op import CaiGPTQLinearOp
HAS_GPTQ_CUDA = False
try:
from colossalai.kernel.op_builder.gptq import GPTQBuilder
gptq_cuda = GPTQBuilder().load()
HAS_GPTQ_CUDA = True
except ImportError:
warnings.warn('CUDA gptq is not installed')
warnings.warn("CUDA gptq is not installed")
HAS_GPTQ_CUDA = False
class CaiQuantLinear(nn.Module):
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
super().__init__()
if bits not in [2, 4, 8]:
@ -37,23 +37,28 @@ class CaiQuantLinear(nn.Module):
self.maxq = 2**self.bits - 1
self.groupsize = groupsize if groupsize != -1 else infeatures
self.register_buffer('qweight', torch.zeros((infeatures // 32 * self.bits, outfeatures), dtype=torch.int32))
self.register_buffer("qweight", torch.zeros((infeatures // 32 * self.bits, outfeatures), dtype=torch.int32))
self.register_buffer(
"qzeros",
torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures // 32 * self.bits), dtype=torch.int32),
)
self.register_buffer(
'qzeros',
torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures // 32 * self.bits), dtype=torch.int32))
self.register_buffer('scales',
torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures), dtype=torch.float16))
"scales", torch.zeros((math.ceil(infeatures / self.groupsize), outfeatures), dtype=torch.float16)
)
if row_split:
self.register_buffer(
'g_idx',
torch.tensor([(i + (tp_rank * self.infeatures)) // self.groupsize for i in range(infeatures)],
dtype=torch.int32))
"g_idx",
torch.tensor(
[(i + (tp_rank * self.infeatures)) // self.groupsize for i in range(infeatures)], dtype=torch.int32
),
)
else:
self.register_buffer('g_idx',
torch.tensor([i // self.groupsize for i in range(infeatures)], dtype=torch.int32))
self.register_buffer(
"g_idx", torch.tensor([i // self.groupsize for i in range(infeatures)], dtype=torch.int32)
)
if bias:
self.register_buffer('bias', torch.zeros((outfeatures), dtype=torch.float16))
self.register_buffer("bias", torch.zeros((outfeatures), dtype=torch.float16))
else:
self.bias = None
@ -66,9 +71,11 @@ class CaiQuantLinear(nn.Module):
self.row_split = row_split
def pack(self, linear, scales, zeros, g_idx=None):
g_idx = g_idx.clone() if g_idx is not None else torch.tensor(
[i // self.groupsize for i in range(self.infeatures)], dtype=torch.int32)
g_idx = (
g_idx.clone()
if g_idx is not None
else torch.tensor([i // self.groupsize for i in range(self.infeatures)], dtype=torch.int32)
)
scales = scales.t().contiguous()
zeros = zeros.t().contiguous()
@ -79,7 +86,6 @@ class CaiQuantLinear(nn.Module):
if linear.bias is not None:
self.bias = linear.bias.clone().half()
wn = 8
pbits = 32
ptype = torch.int32
unsign_type = np.uint32
@ -88,9 +94,10 @@ class CaiQuantLinear(nn.Module):
intweight = []
for idx in range(self.infeatures):
intweight.append(
torch.round(
(linear.weight.data[:, idx] + scale_zeros[g_idx[idx]]) / half_scales[g_idx[idx]]).to(ptype)[:,
None])
torch.round((linear.weight.data[:, idx] + scale_zeros[g_idx[idx]]) / half_scales[g_idx[idx]]).to(ptype)[
:, None
]
)
intweight = torch.cat(intweight, dim=1)
intweight = intweight.t().contiguous()
intweight = intweight.numpy().astype(unsign_type)
@ -109,7 +116,7 @@ class CaiQuantLinear(nn.Module):
raise NotImplementedError("Only 2,4,8 bits are supported.")
qweight = qweight.astype(sign_type)
qweight1 = torch.from_numpy(qweight)
qweight1 = qweight1.contiguous() #.to("cuda")
qweight1 = qweight1.contiguous() # .to("cuda")
self.qweight.data.copy_(qweight1)
qzeros = np.zeros((zeros.shape[0], zeros.shape[1] // pbits * self.bits), dtype=unsign_type)
@ -140,17 +147,20 @@ class CaiQuantLinear(nn.Module):
self.q4_width = self.qweight.shape[1]
if self.g_idx is not None:
if self.row_split and torch.equal(
self.g_idx,
torch.tensor(
[(i + (self.tp_rank * self.infeatures)) // self.groupsize for i in range(self.infeatures)],
dtype=torch.int32,
device=self.g_idx.device)):
self.g_idx,
torch.tensor(
[(i + (self.tp_rank * self.infeatures)) // self.groupsize for i in range(self.infeatures)],
dtype=torch.int32,
device=self.g_idx.device,
),
):
self.g_idx = None
elif torch.equal(
self.g_idx,
torch.tensor([i // self.groupsize for i in range(self.infeatures)],
dtype=torch.int32,
device=self.g_idx.device)):
self.g_idx,
torch.tensor(
[i // self.groupsize for i in range(self.infeatures)], dtype=torch.int32, device=self.g_idx.device
),
):
self.g_idx = None
if self.g_idx is not None:
@ -165,7 +175,6 @@ class CaiQuantLinear(nn.Module):
outshape = x.shape[:-1] + (self.outfeatures,)
if HAS_GPTQ_CUDA and self.bits == 4:
if self.q4 is None:
self.init_q4()
@ -191,7 +200,6 @@ class CaiQuantLinear(nn.Module):
def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, split_num=1):
qweights = gptq_linear.qweight.split(gptq_linear.out_features // split_num, dim=-1)
qzeros = gptq_linear.qzeros.split(gptq_linear.out_features // (32 // cai_linear.bits) // split_num, dim=-1)
scales = gptq_linear.scales.split(gptq_linear.out_features // split_num, dim=-1)
@ -203,24 +211,24 @@ def split_column_copy(gptq_linear, cai_linear, tp_size=1, tp_rank=0, split_num=1
zero_split_block = cai_linear.outfeatures // (32 // cai_linear.bits) // split_num
for i in range(split_num):
cai_linear.qweight[:, i * cai_split_out_features:(i + 1) *
cai_split_out_features] = qweights[i][:, tp_rank * cai_split_out_features:(tp_rank + 1) *
cai_split_out_features]
cai_linear.qzeros[:, i * zero_split_block:(i + 1) *
zero_split_block] = qzeros[i][:, tp_rank * zero_split_block:(tp_rank + 1) * zero_split_block]
cai_linear.scales[:, i * cai_split_out_features:(i + 1) *
cai_split_out_features] = scales[i][:, tp_rank * cai_split_out_features:(tp_rank + 1) *
cai_split_out_features]
cai_linear.qweight[:, i * cai_split_out_features : (i + 1) * cai_split_out_features] = qweights[i][
:, tp_rank * cai_split_out_features : (tp_rank + 1) * cai_split_out_features
]
cai_linear.qzeros[:, i * zero_split_block : (i + 1) * zero_split_block] = qzeros[i][
:, tp_rank * zero_split_block : (tp_rank + 1) * zero_split_block
]
cai_linear.scales[:, i * cai_split_out_features : (i + 1) * cai_split_out_features] = scales[i][
:, tp_rank * cai_split_out_features : (tp_rank + 1) * cai_split_out_features
]
if cai_linear.bias is not None:
cai_linear.bias[i * cai_split_out_features:(i + 1) *
cai_split_out_features] = bias[i][tp_rank * cai_split_out_features:(tp_rank + 1) *
cai_split_out_features]
cai_linear.bias[i * cai_split_out_features : (i + 1) * cai_split_out_features] = bias[i][
tp_rank * cai_split_out_features : (tp_rank + 1) * cai_split_out_features
]
cai_linear.g_idx.copy_(g_idx)
def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1):
qweights = gptq_linear.qweight.split(gptq_linear.in_features // split_num, dim=0)
qzeros = gptq_linear.qzeros.split(gptq_linear.in_features // split_num, dim=0)
scales = gptq_linear.scales.split(gptq_linear.in_features // split_num, dim=0)
@ -231,47 +239,40 @@ def split_row_copy(gptq_linear, cai_linear, tp_rank=0, split_num=1):
idx_split_features = cai_linear.infeatures // split_num
for i in range(split_num):
cai_linear.qweight[i * cai_split_in_features:(i + 1) *
cai_split_in_features, :] = qweights[i][tp_rank * cai_split_in_features:(tp_rank + 1) *
cai_split_in_features, :]
cai_linear.qzeros[i * zero_split_block:(i + 1) *
zero_split_block, :] = qzeros[i][tp_rank * zero_split_block:(tp_rank + 1) *
zero_split_block, :]
cai_linear.scales[i * zero_split_block:(i + 1) *
zero_split_block, :] = scales[i][tp_rank * zero_split_block:(tp_rank + 1) *
zero_split_block, :]
cai_linear.g_idx[i * idx_split_features:(i + 1) *
idx_split_features] = g_idxs[i][tp_rank * idx_split_features:(tp_rank + 1) *
idx_split_features]
cai_linear.qweight[i * cai_split_in_features : (i + 1) * cai_split_in_features, :] = qweights[i][
tp_rank * cai_split_in_features : (tp_rank + 1) * cai_split_in_features, :
]
cai_linear.qzeros[i * zero_split_block : (i + 1) * zero_split_block, :] = qzeros[i][
tp_rank * zero_split_block : (tp_rank + 1) * zero_split_block, :
]
cai_linear.scales[i * zero_split_block : (i + 1) * zero_split_block, :] = scales[i][
tp_rank * zero_split_block : (tp_rank + 1) * zero_split_block, :
]
cai_linear.g_idx[i * idx_split_features : (i + 1) * idx_split_features] = g_idxs[i][
tp_rank * idx_split_features : (tp_rank + 1) * idx_split_features
]
if cai_linear.bias is not None:
cai_linear.bias.copy_(gptq_linear.bias)
class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
super().__init__(bits,
groupsize,
infeatures,
outfeatures,
bias,
tp_size=tp_size,
tp_rank=tp_rank,
row_split=row_split)
super().__init__(
bits, groupsize, infeatures, outfeatures, bias, tp_size=tp_size, tp_rank=tp_rank, row_split=row_split
)
self.process_group = None
@staticmethod
def from_native_module(module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args,
**kwargs) -> ParallelModule:
def from_native_module(
module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args, **kwargs
) -> ParallelModule:
LazyInitContext.materialize(module)
# get the attributes
in_features = module.in_features
# ensure only one process group is passed
if isinstance(process_group, (list, tuple)):
assert len(process_group) == 1, \
f'Expected only one process group, got {len(process_group)}.'
assert len(process_group) == 1, f"Expected only one process group, got {len(process_group)}."
process_group = process_group[0]
tp_size = dist.get_world_size(process_group)
@ -282,15 +283,18 @@ class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
if in_features % tp_size != 0:
raise ValueError(
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!")
linear_1d = RowCaiQuantLinear(module.bits,
module.group_size,
module.in_features // tp_size,
module.out_features,
module.bias is not None,
tp_size=tp_size,
tp_rank=tp_rank,
row_split=True)
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!"
)
linear_1d = RowCaiQuantLinear(
module.bits,
module.group_size,
module.in_features // tp_size,
module.out_features,
module.bias is not None,
tp_size=tp_size,
tp_rank=tp_rank,
row_split=True,
)
linear_1d.process_group = process_group
split_row_copy(module, linear_1d, tp_rank=tp_rank, **kwargs)
@ -306,30 +310,23 @@ class RowCaiQuantLinear(CaiQuantLinear, ParallelModule):
class ColCaiQuantLinear(CaiQuantLinear, ParallelModule):
def __init__(self, bits, groupsize, infeatures, outfeatures, bias, tp_size=1, tp_rank=0, row_split=False):
super().__init__(bits,
groupsize,
infeatures,
outfeatures,
bias,
tp_size=tp_size,
tp_rank=tp_rank,
row_split=row_split)
super().__init__(
bits, groupsize, infeatures, outfeatures, bias, tp_size=tp_size, tp_rank=tp_rank, row_split=row_split
)
self.process_group = None
@staticmethod
def from_native_module(module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args,
**kwargs) -> ParallelModule:
def from_native_module(
module: nn.Module, process_group: Union[ProcessGroup, List[ProcessGroup]], *args, **kwargs
) -> ParallelModule:
LazyInitContext.materialize(module)
# get the attributes
in_features = module.in_features
# ensure only one process group is passed
if isinstance(process_group, (list, tuple)):
assert len(process_group) == 1, \
f'Expected only one process group, got {len(process_group)}.'
assert len(process_group) == 1, f"Expected only one process group, got {len(process_group)}."
process_group = process_group[0]
tp_size = dist.get_world_size(process_group)
@ -340,14 +337,17 @@ class ColCaiQuantLinear(CaiQuantLinear, ParallelModule):
if in_features % tp_size != 0:
raise ValueError(
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!")
linear_1d = ColCaiQuantLinear(module.bits,
module.group_size,
module.in_features,
module.out_features // tp_size,
module.bias is not None,
tp_size=tp_size,
tp_rank=tp_rank)
f"The size of in_features:{in_features} is not integer multiples of tensor parallel size: {tp_size}!"
)
linear_1d = ColCaiQuantLinear(
module.bits,
module.group_size,
module.in_features,
module.out_features // tp_size,
module.bias is not None,
tp_size=tp_size,
tp_rank=tp_rank,
)
linear_1d.process_group = process_group
split_column_copy(module, linear_1d, tp_rank=tp_rank, **kwargs)

133
colossalai/kernel/triton/context_attention.py

@ -5,6 +5,7 @@ import torch
try:
import triton
import triton.language as tl
HAS_TRITON = True
except ImportError:
HAS_TRITON = False
@ -16,6 +17,7 @@ if HAS_TRITON:
https://github.com/ModelTC/lightllm/blob/f093edc20683ac3ea1bca3fb5d8320a0dd36cf7b/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py#L10
"""
if triton.__version__ < "2.1.0":
@triton.jit
def _context_flash_attention_kernel(
Q,
@ -131,29 +133,47 @@ if HAS_TRITON:
m_i = m_i_new
off_o = (
(cur_batch_start_index + offs_m[:, None]) * stride_obs + cur_head * stride_oh + offs_d[None, :] * stride_od
(cur_batch_start_index + offs_m[:, None]) * stride_obs
+ cur_head * stride_oh
+ offs_d[None, :] * stride_od
)
out_ptrs = Out + off_o
tl.store(out_ptrs, acc, mask=offs_m[:, None] < cur_batch_seq_len)
return
else:
# this function is modified from https://github.com/ModelTC/lightllm/blob/main/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py#L11
@triton.jit
def _context_flash_attention_kernel_2(
Q, K, V, sm_scale, Alibi, B_Start_Loc, B_Seqlen,
Out,
kv_group_num,
stride_qbs, stride_qh, stride_qd,
stride_kbs, stride_kh, stride_kd,
stride_vbs, stride_vh, stride_vd,
stride_obs, stride_oh, stride_od,
BLOCK_M: tl.constexpr, BLOCK_DMODEL: tl.constexpr,
Q,
K,
V,
sm_scale,
Alibi,
B_Start_Loc,
B_Seqlen,
Out,
kv_group_num,
stride_qbs,
stride_qh,
stride_qd,
stride_kbs,
stride_kh,
stride_kd,
stride_vbs,
stride_vh,
stride_vd,
stride_obs,
stride_oh,
stride_od,
BLOCK_M: tl.constexpr,
BLOCK_DMODEL: tl.constexpr,
BLOCK_N: tl.constexpr,
):
cur_batch = tl.program_id(0)
cur_head = tl.program_id(1)
start_m = tl.program_id(2)
if kv_group_num is not None:
cur_kv_head = cur_head // kv_group_num
@ -166,7 +186,11 @@ if HAS_TRITON:
offs_n = tl.arange(0, BLOCK_N)
offs_d = tl.arange(0, BLOCK_DMODEL)
offs_m = start_m * BLOCK_M + tl.arange(0, BLOCK_M)
off_q = (cur_batch_in_all_start_index + offs_m[:, None]) * stride_qbs + cur_head * stride_qh + offs_d[None, :] * stride_qd
off_q = (
(cur_batch_in_all_start_index + offs_m[:, None]) * stride_qbs
+ cur_head * stride_qh
+ offs_d[None, :] * stride_qd
)
if kv_group_num is None or kv_group_num == 1:
off_k = offs_n[None, :] * stride_kbs + cur_head * stride_kh + offs_d[:, None] * stride_kd
off_v = offs_n[:, None] * stride_vbs + cur_head * stride_vh + offs_d[None, :] * stride_vd
@ -191,8 +215,11 @@ if HAS_TRITON:
for start_n in range(0, block_mask * (start_m + 1) * BLOCK_M, BLOCK_N):
start_n = tl.multiple_of(start_n, BLOCK_N)
# -- compute qk ----
k = tl.load(k_ptrs + (cur_batch_in_all_start_index + start_n) * stride_kbs,
mask=(start_n + offs_n[None, :]) < cur_batch_seq_len, other=0.0)
k = tl.load(
k_ptrs + (cur_batch_in_all_start_index + start_n) * stride_kbs,
mask=(start_n + offs_n[None, :]) < cur_batch_seq_len,
other=0.0,
)
qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
qk += tl.dot(q, k)
@ -220,8 +247,11 @@ if HAS_TRITON:
acc_scale = l_i / l_i_new * alpha
acc = acc * acc_scale[:, None]
# update acc
v = tl.load(v_ptrs + (cur_batch_in_all_start_index + start_n) * stride_vbs,
mask=(start_n + offs_n[:, None]) < cur_batch_seq_len, other=0.0)
v = tl.load(
v_ptrs + (cur_batch_in_all_start_index + start_n) * stride_vbs,
mask=(start_n + offs_n[:, None]) < cur_batch_seq_len,
other=0.0,
)
p = p.to(v.dtype)
acc += tl.dot(p, v)
@ -229,7 +259,11 @@ if HAS_TRITON:
l_i = l_i_new
m_i = m_i_new
# initialize pointers to output
off_o = (cur_batch_in_all_start_index + offs_m[:, None]) * stride_obs + cur_head * stride_oh + offs_d[None, :] * stride_od
off_o = (
(cur_batch_in_all_start_index + offs_m[:, None]) * stride_obs
+ cur_head * stride_oh
+ offs_d[None, :] * stride_od
)
out_ptrs = Out + off_o
tl.store(out_ptrs, acc, mask=offs_m[:, None] < cur_batch_seq_len)
return
@ -249,7 +283,7 @@ if HAS_TRITON:
grid = (batch, head, triton.cdiv(max_input_len, BLOCK))
num_warps = 4 if Lk <= 64 else 8
if triton.__version__ < "2.1.0":
tmp = torch.empty((batch, head, max_input_len + 256), device=q.device, dtype=torch.float32)
_context_flash_attention_kernel[grid](
@ -286,20 +320,26 @@ if HAS_TRITON:
)
else:
_context_flash_attention_kernel_2[grid](
q, k, v, sm_scale, alibi, b_start_loc, b_seq_len,
q,
k,
v,
sm_scale,
alibi,
b_start_loc,
b_seq_len,
o,
None,
q.stride(0),
q.stride(1),
q.stride(0),
q.stride(1),
q.stride(2),
k.stride(0),
k.stride(1),
k.stride(0),
k.stride(1),
k.stride(2),
v.stride(0),
v.stride(1),
v.stride(0),
v.stride(1),
v.stride(2),
o.stride(0),
o.stride(1),
o.stride(0),
o.stride(1),
o.stride(2),
BLOCK_M=BLOCK,
BLOCK_DMODEL=Lk,
@ -307,7 +347,7 @@ if HAS_TRITON:
num_warps=num_warps,
num_stages=1,
)
return
@torch.no_grad()
@ -327,7 +367,7 @@ if HAS_TRITON:
tmp = torch.empty((batch, head, max_input_len + 256), device=q.device, dtype=torch.float32)
num_warps = 4 if Lk <= 64 else 8
# num_warps = 4
if triton.__version__ < "2.1.0":
_context_flash_attention_kernel[grid](
q,
@ -337,7 +377,7 @@ if HAS_TRITON:
b_start_loc,
b_seq_len,
tmp,
None,
None,
o,
q.stride(0),
q.stride(1),
@ -362,32 +402,33 @@ if HAS_TRITON:
)
else:
kv_group_num = q.shape[1] // k.shape[1]
_context_flash_attention_kernel_2[grid](
q,
k,
v,
sm_scale,
_context_flash_attention_kernel_2[grid](
q,
k,
v,
sm_scale,
None,
b_start_loc,
b_start_loc,
b_seq_len,
o,
kv_group_num,
q.stride(0),
q.stride(1),
q.stride(0),
q.stride(1),
q.stride(2),
k.stride(0),
k.stride(1),
k.stride(0),
k.stride(1),
k.stride(2),
v.stride(0),
v.stride(1),
v.stride(0),
v.stride(1),
v.stride(2),
o.stride(0),
o.stride(1),
o.stride(0),
o.stride(1),
o.stride(2),
BLOCK_M=BLOCK,
BLOCK_DMODEL=Lk,
BLOCK_N=BLOCK,
num_warps=num_warps,
num_stages=1,)
return
num_stages=1,
)
return

51
colossalai/kernel/triton/flash_decoding.py

@ -1,8 +1,10 @@
# adepted from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8/lightllm/models/llama/triton_kernel/flash_decoding.py
import torch
try:
from lightllm.models.llama.triton_kernel.flash_decoding_stage1 import flash_decode_stage1
from lightllm.models.llama.triton_kernel.flash_decoding_stage2 import flash_decode_stage2
HAS_LIGHTLLM_KERNEL = True
except:
print("install lightllm from https://github.com/ModelTC/lightllm/blob/ece7b43f8a6dfa74027adc77c2c176cff28c76c8")
@ -10,41 +12,36 @@ except:
if HAS_LIGHTLLM_KERNEL:
def token_flash_decoding(q, o_tensor, infer_state, q_head_num, head_dim, cache_k, cache_v):
BLOCK_SEQ = 256
batch_size = infer_state.batch_size
max_len_in_batch = infer_state.max_len_in_batch
calcu_shape1 = (batch_size, q_head_num, head_dim)
if getattr(infer_state, 'mid_o', None) is None:
infer_state.mid_o = torch.empty([batch_size,
q_head_num,
max_len_in_batch // BLOCK_SEQ + 1,
head_dim],
dtype=torch.float32,
device="cuda")
infer_state.mid_o_logexpsum = torch.empty([batch_size,
q_head_num,
max_len_in_batch // BLOCK_SEQ + 1],
dtype=torch.float32,
device="cuda")
if getattr(infer_state, "mid_o", None) is None:
infer_state.mid_o = torch.empty(
[batch_size, q_head_num, max_len_in_batch // BLOCK_SEQ + 1, head_dim],
dtype=torch.float32,
device="cuda",
)
infer_state.mid_o_logexpsum = torch.empty(
[batch_size, q_head_num, max_len_in_batch // BLOCK_SEQ + 1], dtype=torch.float32, device="cuda"
)
mid_o = infer_state.mid_o
mid_o_logexpsum = infer_state.mid_o_logexpsum
flash_decode_stage1(q.view(calcu_shape1),
cache_k,
cache_v,
infer_state.block_loc,
infer_state.seq_len,
infer_state.max_len_in_batch,
mid_o,
mid_o_logexpsum,
BLOCK_SEQ)
flash_decode_stage2(mid_o,
mid_o_logexpsum,
infer_state.seq_len,
o_tensor.view(calcu_shape1),
BLOCK_SEQ)
flash_decode_stage1(
q.view(calcu_shape1),
cache_k,
cache_v,
infer_state.block_loc,
infer_state.seq_len,
infer_state.max_len_in_batch,
mid_o,
mid_o_logexpsum,
BLOCK_SEQ,
)
flash_decode_stage2(mid_o, mid_o_logexpsum, infer_state.seq_len, o_tensor.view(calcu_shape1), BLOCK_SEQ)

65
colossalai/kernel/triton/llama_act_combine_kernel.py

@ -8,6 +8,7 @@ from torch.cuda.amp import custom_bwd, custom_fwd
try:
import triton
import triton.language as tl
HAS_TRITON = True
except ImportError:
HAS_TRITON = False
@ -26,8 +27,8 @@ if HAS_TRITON:
X_GATE2,
X_UP,
Y,
stride, # how much to increase the pointer when moving by 1 row
N, # number of columns in X
stride, # how much to increase the pointer when moving by 1 row
N, # number of columns in X
BLOCK_SIZE: tl.constexpr,
):
# Map the program id to the row of X and Y it should compute.
@ -41,9 +42,9 @@ if HAS_TRITON:
for off in range(0, N, BLOCK_SIZE):
cols = off + tl.arange(0, BLOCK_SIZE)
mask = cols < N
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.)
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.)
x_up = tl.load(X_UP + cols, mask=mask, other=0.)
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.0)
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.0)
x_up = tl.load(X_UP + cols, mask=mask, other=0.0)
x_gate2_sigmoid = tl.sigmoid(x_gate2.to(tl.float32)).to(x_gate2.dtype)
y = x_gate1 * x_gate2 * x_gate2_sigmoid * x_up
# Write output
@ -58,8 +59,8 @@ if HAS_TRITON:
X_GATE2_GRAD,
X_UP_GRAD,
Y_GRAD,
stride, # how much to increase the pointer when moving by 1 row
N, # number of columns in X
stride, # how much to increase the pointer when moving by 1 row
N, # number of columns in X
BLOCK_SIZE: tl.constexpr,
):
# Map the program id to the row of X and Y it should compute.
@ -76,10 +77,10 @@ if HAS_TRITON:
for off in range(0, N, BLOCK_SIZE):
cols = off + tl.arange(0, BLOCK_SIZE)
mask = cols < N
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.)
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.)
x_up = tl.load(X_UP + cols, mask=mask, other=0.)
y_grad = tl.load(Y_GRAD + cols, mask=mask, other=0.)
x_gate1 = tl.load(X_GATE1 + cols, mask=mask, other=0.0)
x_gate2 = tl.load(X_GATE2 + cols, mask=mask, other=0.0)
x_up = tl.load(X_UP + cols, mask=mask, other=0.0)
y_grad = tl.load(Y_GRAD + cols, mask=mask, other=0.0)
# forward: y = x_gate1 * x_gate2 * tl.sigmoid(x_gate2) * x_up
x_gate2_sigmoid = tl.sigmoid(x_gate2.to(tl.float32)).to(x_gate2.dtype)
@ -147,14 +148,9 @@ if HAS_TRITON:
# restore setting
ctx.M, ctx.N, ctx.BLOCK_SIZE, ctx.num_warps = M, N, BLOCK_SIZE, num_warps
# enqueue kernel
_llama_act_combine_forward[(M,)](x_gate1,
x_gate2,
x_up,
y,
x_up.stride(-2),
N,
BLOCK_SIZE=BLOCK_SIZE,
num_warps=num_warps)
_llama_act_combine_forward[(M,)](
x_gate1, x_gate2, x_up, y, x_up.stride(-2), N, BLOCK_SIZE=BLOCK_SIZE, num_warps=num_warps
)
return y
@staticmethod
@ -166,20 +162,25 @@ if HAS_TRITON:
# init grad
y_grad = grad_outputs[0]
x_gate1_grad, x_gate2_grad, x_up_grad = torch.empty_like(x_gate1), torch.empty_like(
x_gate2), torch.empty_like(x_up)
x_gate1_grad, x_gate2_grad, x_up_grad = (
torch.empty_like(x_gate1),
torch.empty_like(x_gate2),
torch.empty_like(x_up),
)
# enqueue kernel
_llama_act_combine_backward[(M,)](x_gate1,
x_gate2,
x_up,
x_gate1_grad,
x_gate2_grad,
x_up_grad,
y_grad,
x_up.stride(-2),
N,
BLOCK_SIZE=BLOCK_SIZE,
num_warps=num_warps)
_llama_act_combine_backward[(M,)](
x_gate1,
x_gate2,
x_up,
x_gate1_grad,
x_gate2_grad,
x_up_grad,
y_grad,
x_up.stride(-2),
N,
BLOCK_SIZE=BLOCK_SIZE,
num_warps=num_warps,
)
x_gate_grad = torch.cat([x_gate1_grad, x_gate2_grad], dim=-1)
return x_gate_grad, x_up_grad, None, None

20
colossalai/kernel/triton/token_attention_kernel.py

@ -13,10 +13,18 @@ except ImportError:
print("please install triton from https://github.com/openai/triton")
try:
from lightllm.models.llama.triton_kernel.token_attention_nopad_reduceV import token_att_fwd2 as lightllm_llama_token_att_fwd2
from lightllm.models.llama.triton_kernel.token_attention_nopad_att1 import token_att_fwd as lightllm_llama_token_att_fwd
from lightllm.models.llama.triton_kernel.token_attention_nopad_softmax import token_softmax_fwd as lightllm_llama_token_softmax_fwd
from lightllm.models.bloom.triton_kernel.token_attention_nopad_att1 import token_att_fwd as lightllm_bloom_token_att_fwd
from lightllm.models.bloom.triton_kernel.token_attention_nopad_att1 import (
token_att_fwd as lightllm_bloom_token_att_fwd,
)
from lightllm.models.llama.triton_kernel.token_attention_nopad_att1 import (
token_att_fwd as lightllm_llama_token_att_fwd,
)
from lightllm.models.llama.triton_kernel.token_attention_nopad_reduceV import (
token_att_fwd2 as lightllm_llama_token_att_fwd2,
)
from lightllm.models.llama.triton_kernel.token_attention_nopad_softmax import (
token_softmax_fwd as lightllm_llama_token_softmax_fwd,
)
HAS_TRITON_TOKEN_ATTENTION = True
except ImportError:
@ -205,9 +213,7 @@ class Llama2TokenAttentionForwards:
if triton.__version__ == "2.0.0":
prob = torch.empty_like(att_m_tensor)
lightllm_llama_token_softmax_fwd(
att_m_tensor, kv_cache_start_loc, kv_cache_seq_len, prob, max_len_in_batch
)
lightllm_llama_token_softmax_fwd(att_m_tensor, kv_cache_start_loc, kv_cache_seq_len, prob, max_len_in_batch)
att_m_tensor = None
lightllm_llama_token_att_fwd2(

4
colossalai/legacy/inference/tensor_parallel/modeling/llama.py

@ -8,7 +8,9 @@ from transformers.models.llama.modeling_llama import LlamaAttention, LlamaDecode
from colossalai.inference.tensor_parallel.batch_infer_state import BatchInferState
from colossalai.kernel.triton import llama_context_attn_fwd, token_attention_fwd
from colossalai.kernel.triton.token_attention_kernel import Llama2TokenAttentionForwards
from ._utils import copy_kv_to_mem_cache
try:
from lightllm.models.llama.triton_kernel.context_flashattention_nopad import (
context_attention_fwd as lightllm_llama_context_attention_fwd,
@ -90,7 +92,7 @@ def llama_triton_token_attention(query_states, attn_output, infer_state, num_key
# infer_state.cache_manager.past_key_values_length,
infer_state.max_len_in_batch,
)
else:
Llama2TokenAttentionForwards.token_attn(
query_states,

2
colossalai/shardformer/layer/__init__.py

@ -1,5 +1,5 @@
from .attn import AttnMaskType, ColoAttention
from ._operation import all_to_all_comm
from .attn import AttnMaskType, ColoAttention
from .dropout import DropoutForParallelInput, DropoutForReplicatedInput
from .embedding import Embedding1D, VocabParallelEmbedding1D
from .linear import Linear1D_Col, Linear1D_Row

4
colossalai/tensor/d_tensor/__init__.py

@ -2,13 +2,13 @@ from .api import (
compute_global_numel,
customized_distributed_tensor_to_param,
distribute_tensor,
init_as_dtensor,
distribute_tensor_with_customization,
init_tensor_as_customization_distributed,
get_device_mesh,
get_global_shape,
get_layout,
get_sharding_spec,
init_as_dtensor,
init_tensor_as_customization_distributed,
is_customized_distributed_tensor,
is_distributed_tensor,
is_sharded,

7
colossalai/tensor/d_tensor/api.py

@ -128,7 +128,10 @@ def distribute_tensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_sp
return sharded_tensor
def init_as_dtensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec, global_shape: torch.Size) -> torch.Tensor:
def init_as_dtensor(
tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec, global_shape: torch.Size
) -> torch.Tensor:
assert not is_distributed_tensor(tensor), "The input tensor is already a distributed tensor."
dist_layout = Layout(device_mesh=device_mesh, sharding_spec=sharding_spec, global_shape=global_shape)
@ -140,6 +143,7 @@ def init_as_dtensor(tensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec
return tensor
def redistribute(dtensor: torch.Tensor, device_mesh: DeviceMesh, sharding_spec: ShardingSpec) -> None:
"""
Convert the layout of the tensor from source_spec to target_spec.
@ -468,7 +472,6 @@ def init_tensor_as_customization_distributed(tensor: torch.Tensor, shard_fn, gat
assert callable(gather_fn), "The gather_fn must be callable."
assert not is_distributed_tensor(tensor), "The input tensor is already a distributed tensor."
# set the shard_fn and gather_fn as attributes of the distributed tensor
tensor.shard_fn = shard_fn
tensor.gather_fn = gather_fn

1
colossalai/zero/low_level/_utils.py

@ -190,6 +190,7 @@ def calculate_global_norm_from_list(norm_list):
total_norm += norm**2.0
return math.sqrt(total_norm)
def sync_tensor(flat_tensor, tensor_list):
"""
Synchronize the flattened tensor and unflattened tensor list. When

2
docs/source/zh-Hans/advanced_tutorials/train_vit_with_hybrid_parallelism.md

@ -220,7 +220,7 @@ model, optimizer, _criterion, train_dataloader, lr_scheduler = booster.boost(
)
```
## 使用混合并行训练 ViT
最后就可以使用混合并行策略来训练模型了。我们先定义一个训练函数,描述训练过程。需要注意的是,如果使用了管道并行策略,需要调用`booster.execute_pipeline`来执行模型的训练,它会调用`scheduler`管理模型的前后向操作。
最后就可以使用混合并行策略来训练模型了。我们先定义一个训练函数,描述训练过程。需要注意的是,如果使用了管道并行策略,需要调用`booster.execute_pipeline`来执行模型的训练,它会调用`scheduler`管理模型的前后向操作。
```python
def run_forward_backward(
model: nn.Module,

4
examples/images/vit/vit_benchmark.py

@ -119,9 +119,7 @@ def main():
if hasattr(booster.plugin, "stage_manager") and booster.plugin.stage_manager is not None:
# run pipeline forward backward
batch = iter([batch])
outputs = booster.execute_pipeline(
batch, model, criterion, optimizer, return_loss=True
)
outputs = booster.execute_pipeline(batch, model, criterion, optimizer, return_loss=True)
else:
outputs = model(**batch)
loss = criterion(outputs, None)

2
examples/language/data_utils.py

@ -121,4 +121,4 @@ class RandomDataset(Dataset):
"input_ids": self.input_ids[idx],
"attention_mask": self.attention_mask[idx],
"labels": self.input_ids[idx],
}
}

4
examples/language/llama2/finetune.py

@ -270,9 +270,7 @@ def main():
) as pbar:
for step in pbar:
if use_pipeline:
outputs = booster.execute_pipeline(
dataloader_iter, model, _criterion, optimizer, return_loss=True
)
outputs = booster.execute_pipeline(dataloader_iter, model, _criterion, optimizer, return_loss=True)
loss = outputs["loss"]
else:
batch = next(dataloader_iter)

4
examples/language/llama2/pretrain.py

@ -285,9 +285,7 @@ def main():
) as pbar:
for step in pbar:
if use_pipeline:
outputs = booster.execute_pipeline(
dataloader_iter, model, _criterion, optimizer, return_loss=True
)
outputs = booster.execute_pipeline(dataloader_iter, model, _criterion, optimizer, return_loss=True)
loss = outputs["loss"]
else:
batch = next(dataloader_iter)

6
examples/language/openmoe/benchmark/utils.py

@ -50,7 +50,6 @@ def all_reduce_mean(x: float, world_size: int) -> float:
class Timer:
def __init__(self) -> None:
self.start_time: Optional[float] = None
self.duration: float = 0.0
@ -112,7 +111,7 @@ class PerformanceEvaluator:
batch_size, seq_len = input_ids.shape
self.num_samples += batch_size
self.flop += (batch_size * seq_len * self.model_numel * 2 * (3 + int(self.enable_grad_checkpoint)))
self.flop += batch_size * seq_len * self.model_numel * 2 * (3 + int(self.enable_grad_checkpoint))
def on_fit_end(self) -> None:
avg_duration = all_reduce_mean(self.timer.duration, self.world_size)
@ -122,5 +121,6 @@ class PerformanceEvaluator:
if dist.get_rank() == 0:
print(
f"num_samples: {self.num_samples}, dp_world_size: {self.dp_world_size}, flop: {self.flop}, avg_duration: {avg_duration}, "
f"avg_throughput: {avg_throughput}")
f"avg_throughput: {avg_throughput}"
)
print(f"Throughput: {avg_throughput:.2f} samples/sec, TFLOPS per GPU: {avg_tflops_per_gpu:.2f}")

14
examples/language/openmoe/infer.py

@ -16,17 +16,15 @@ def inference(args):
tokenizer = T5Tokenizer.from_pretrained("google/umt5-small")
if args.model == "test":
config = LlamaConfig.from_pretrained("hpcai-tech/openmoe-base")
set_openmoe_args(config,
num_experts=config.num_experts,
moe_layer_interval=config.moe_layer_interval,
enable_kernel=True)
set_openmoe_args(
config, num_experts=config.num_experts, moe_layer_interval=config.moe_layer_interval, enable_kernel=True
)
model = OpenMoeForCausalLM(config)
else:
config = LlamaConfig.from_pretrained(f"hpcai-tech/openmoe-{args.model}")
set_openmoe_args(config,
num_experts=config.num_experts,
moe_layer_interval=config.moe_layer_interval,
enable_kernel=False)
set_openmoe_args(
config, num_experts=config.num_experts, moe_layer_interval=config.moe_layer_interval, enable_kernel=False
)
model = OpenMoeForCausalLM.from_pretrained(f"hpcai-tech/openmoe-{args.model}", config=config)
model = model.eval().bfloat16()
model = model.to(torch.cuda.current_device())

22
examples/language/openmoe/model/convert_openmoe_ckpt.py

@ -172,9 +172,9 @@ def make_state_dict(converted_params):
def load_t5x_weights_in_t5(model, config, t5x_checkpoint_path):
"""Replaces the params in model witht the T5X converted params."""
variables = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path)
converted = convert_t5x_to_pytorch(variables,
num_layers=config.num_hidden_layers,
moe_interval=config.moe_layer_interval)
converted = convert_t5x_to_pytorch(
variables, num_layers=config.num_hidden_layers, moe_interval=config.moe_layer_interval
)
state_dict = make_state_dict(converted)
model.load_state_dict(state_dict, strict=True)
@ -203,11 +203,9 @@ def convert_t5x_checkpoint_to_pytorch(t5x_checkpoint_path, config_file, pytorch_
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Converts a native T5X checkpoint into a PyTorch checkpoint.")
# Required parameters
parser.add_argument("--t5x_checkpoint_path",
default=None,
type=str,
required=True,
help="Path to the T5X checkpoint.")
parser.add_argument(
"--t5x_checkpoint_path", default=None, type=str, required=True, help="Path to the T5X checkpoint."
)
parser.add_argument(
"--config_file",
default=None,
@ -215,10 +213,8 @@ if __name__ == "__main__":
required=True,
help="The config json file corresponding to the pre-trained T5 model.\nThis specifies the model architecture.",
)
parser.add_argument("--pytorch_dump_path",
default=None,
type=str,
required=True,
help="Path to the output PyTorch model.")
parser.add_argument(
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
)
args = parser.parse_args()
convert_t5x_checkpoint_to_pytorch(args.t5x_checkpoint_path, args.config_file, args.pytorch_dump_path)

4
examples/language/opt/opt_train_demo.py

@ -41,9 +41,7 @@ def train_epoch(epoch, model, optimizer, _criterion, lr_scheduler, dataloader, b
# Forward pass
for _ in pbar:
if use_pipeline:
outputs = booster.execute_pipeline(
dataloader, model, _criterion, optimizer, return_loss=True
)
outputs = booster.execute_pipeline(dataloader, model, _criterion, optimizer, return_loss=True)
# Backward and optimize
if is_pp_last_stage:
loss = outputs["loss"]

3
extensions/cpu_adam/__init__.py

@ -1,5 +1,4 @@
from .cpu_adam_arm import CpuAdamArmExtension
from .cpu_adam_x86 import CpuAdamX86Extension
__all__ = ['CpuAdamArmExtension', 'CpuAdamX86Extension']
__all__ = ["CpuAdamArmExtension", "CpuAdamX86Extension"]

2
extensions/layernorm/__init__.py

@ -1,3 +1,3 @@
from .layernorm_cuda import LayerNormCudaExtension
__all__ = ["LayerNormCudaExtension"]
__all__ = ["LayerNormCudaExtension"]

2
extensions/moe/__init__.py

@ -1,3 +1,3 @@
from .moe_cuda import MoeCudaExtension
__all__ = ['MoeCudaExtension']
__all__ = ["MoeCudaExtension"]

2
extensions/optimizer/__init__.py

@ -1,3 +1,3 @@
from .fused_optimizer_cuda import FusedOptimizerCudaExtension
__all__ = ['FusedOptimizerCudaExtension']
__all__ = ["FusedOptimizerCudaExtension"]

2
extensions/softmax/__init__.py

@ -1,4 +1,4 @@
from .scaled_masked_softmax_cuda import ScaledMaskedSoftmaxCudaExtension
from .scaled_upper_triangle_masked_softmax_cuda import ScaledUpperTriangleMaskedSoftmaxCudaExtension
__all__ = ['ScaledMaskedSoftmaxCudaExtension', 'ScaledUpperTriangleMaskedSoftmaxCudaExtension']
__all__ = ["ScaledMaskedSoftmaxCudaExtension", "ScaledUpperTriangleMaskedSoftmaxCudaExtension"]

44
tests/kit/model_zoo/__init__.py

@ -1,33 +1,33 @@
import os
from . import custom, diffusers, timm, torchaudio, torchvision, transformers
from .executor import run_fwd, run_fwd_bwd
from .registry import model_zoo
# We pick a subset of models for fast testing in order to reduce the total testing time
COMMON_MODELS = [
'custom_hanging_param_model',
'custom_nested_model',
'custom_repeated_computed_layers',
'custom_simple_net',
'diffusers_clip_text_model',
'diffusers_auto_encoder_kl',
'diffusers_unet2d_model',
'timm_densenet',
'timm_resnet',
'timm_swin_transformer',
'torchaudio_wav2vec2_base',
'torchaudio_conformer',
'transformers_bert_for_masked_lm',
'transformers_bloom_for_causal_lm',
'transformers_falcon_for_causal_lm',
'transformers_chatglm_for_conditional_generation',
'transformers_llama_for_casual_lm',
'transformers_vit_for_masked_image_modeling',
'transformers_mistral_for_casual_lm'
"custom_hanging_param_model",
"custom_nested_model",
"custom_repeated_computed_layers",
"custom_simple_net",
"diffusers_clip_text_model",
"diffusers_auto_encoder_kl",
"diffusers_unet2d_model",
"timm_densenet",
"timm_resnet",
"timm_swin_transformer",
"torchaudio_wav2vec2_base",
"torchaudio_conformer",
"transformers_bert_for_masked_lm",
"transformers_bloom_for_causal_lm",
"transformers_falcon_for_causal_lm",
"transformers_chatglm_for_conditional_generation",
"transformers_llama_for_casual_lm",
"transformers_vit_for_masked_image_modeling",
"transformers_mistral_for_casual_lm",
]
IS_FAST_TEST = os.environ.get('FAST_TEST', '0') == '1'
IS_FAST_TEST = os.environ.get("FAST_TEST", "0") == "1"
__all__ = ["model_zoo", "run_fwd", "run_fwd_bwd", 'COMMON_MODELS', 'IS_FAST_TEST']
__all__ = ["model_zoo", "run_fwd", "run_fwd_bwd", "COMMON_MODELS", "IS_FAST_TEST"]

2
tests/kit/model_zoo/registry.py

@ -102,4 +102,4 @@ class ModelZooRegistry(dict):
return new_dict
model_zoo = ModelZooRegistry()
model_zoo = ModelZooRegistry()

1
tests/kit/model_zoo/transformers/chatglm2.py

@ -2,6 +2,7 @@ import torch
from colossalai.shardformer.modeling.chatglm2_6b.configuration_chatglm import ChatGLMConfig
from colossalai.shardformer.modeling.chatglm2_6b.modeling_chatglm import ChatGLMForConditionalGeneration, ChatGLMModel
from ..registry import ModelAttribute, model_zoo
# ================================

8
tests/test_checkpoint_io/test_hybrid_parallel_plugin_checkpoint_io.py

@ -74,9 +74,7 @@ def exam_state_dict(shard: bool, model_name: str, size_per_shard: int, test_conf
data = data_gen_fn()
model.train()
if booster.plugin.stage_manager is not None:
booster.execute_pipeline(
_preprocess_data(data), model, _criterion, optimizer, return_loss=True
)
booster.execute_pipeline(_preprocess_data(data), model, _criterion, optimizer, return_loss=True)
else:
output = model(**_preprocess_data(data))
loss = criterion(output)
@ -108,9 +106,7 @@ def exam_state_dict(shard: bool, model_name: str, size_per_shard: int, test_conf
data_for_shard = data_gen_fn()
data_for_origin = data_gen_fn()
if booster.plugin.stage_manager is not None:
booster.execute_pipeline(
_preprocess_data(data_for_shard), model, _criterion, optimizer, return_loss=True
)
booster.execute_pipeline(_preprocess_data(data_for_shard), model, _criterion, optimizer, return_loss=True)
booster.execute_pipeline(
_preprocess_data(data_for_origin),
new_model,

1
tests/test_checkpoint_io/test_torch_fsdp_checkpoint_io.py

@ -113,6 +113,7 @@ def check_torch_fsdp_ckpt():
full_osd = FSDP.full_optim_state_dict(optimizer.unwrap_model().unwrap(), optim=optimizer)
import copy
sharded_osd = copy.deepcopy(full_osd)
run_model()

30
tests/test_gptq/test_gptq_linear.py

@ -1,16 +1,8 @@
import math
import time
import numpy as np
import pytest
import torch
import torch.nn as nn
import transformers
from packaging import version
try:
import triton
import triton.language as tl
HAS_TRITON = True
except ImportError:
HAS_TRITON = False
@ -22,6 +14,7 @@ try:
from exllama_kernels import prepare_buffers, set_tuning_params
from colossalai.inference.quant.gptq import CaiQuantLinear
HAS_AUTO_GPTQ = True
except:
HAS_AUTO_GPTQ = False
@ -32,13 +25,14 @@ import warnings
HAS_GPTQ_CUDA = False
try:
from colossalai.kernel.op_builder.gptq import GPTQBuilder
gptq_cuda = GPTQBuilder().load()
HAS_GPTQ_CUDA = True
except ImportError:
warnings.warn('CUDA gptq is not installed')
warnings.warn("CUDA gptq is not installed")
HAS_GPTQ_CUDA = False
TRITON_CUDA_SUPPORT = version.parse(torch.version.cuda) > version.parse('11.4')
TRITON_CUDA_SUPPORT = version.parse(torch.version.cuda) > version.parse("11.4")
max_inner_outer_dim = 1
max_input_len = 1
@ -64,9 +58,9 @@ def init_buffer(cai_linear, use_act_order=False):
max_input_len = 4096
# The temp_state buffer is required to reorder X in the act-order case.
# The temp_dq buffer is required to dequantize weights when using cuBLAS, typically for the prefill.
gptq_temp_state_buffer = torch.zeros((max_input_len, max_inner_outer_dim),
dtype=torch.float16,
device=torch.cuda.current_device())
gptq_temp_state_buffer = torch.zeros(
(max_input_len, max_inner_outer_dim), dtype=torch.float16, device=torch.cuda.current_device()
)
gptq_temp_dq_buffer = torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=torch.cuda.current_device())
gptq_cuda.prepare_buffers(torch.device(torch.cuda.current_device()), gptq_temp_state_buffer, gptq_temp_dq_buffer)
@ -77,10 +71,11 @@ def init_buffer(cai_linear, use_act_order=False):
gptq_cuda.set_tuning_params(matmul_recons_thd, matmul_fused_remap, matmul_no_half2)
@pytest.mark.skipif(not TRITON_CUDA_SUPPORT or not HAS_TRITON or not HAS_AUTO_GPTQ,
reason="triton requires cuda version to be higher than 11.4 or not install auto-gptq")
@pytest.mark.skipif(
not TRITON_CUDA_SUPPORT or not HAS_TRITON or not HAS_AUTO_GPTQ,
reason="triton requires cuda version to be higher than 11.4 or not install auto-gptq",
)
def test_gptq_linear():
infeature = 1024
outfeature = 1024
group_size = 128
@ -120,7 +115,7 @@ def test_gptq_linear():
max_input_len = 2048
buffers = {
"temp_state": torch.zeros((max_input_len, max_inner_outer_dim), dtype=torch.float16, device=device),
"temp_dq": torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=device)
"temp_dq": torch.zeros((1, max_dq_buffer_size), dtype=torch.float16, device=device),
}
prepare_buffers(device, buffers["temp_state"], buffers["temp_dq"])
@ -146,5 +141,4 @@ def test_gptq_linear():
if __name__ == "__main__":
test_gptq_linear()

2
tests/test_lazy/test_models.py

@ -24,4 +24,4 @@ def test_torchvision_models_lazy_init(subset, default_device):
if __name__ == "__main__":
test_torchvision_models_lazy_init("transformers", "cpu")
test_torchvision_models_lazy_init("transformers", "cpu")

3
tests/test_optimizer/test_nvme.py

@ -1,5 +1,5 @@
import torch
import pytest
import torch
from colossalai.nn.optimizer import CPUAdam, HybridAdam
from colossalai.testing import clear_cache_before_run, parameterize
@ -17,6 +17,7 @@ def check_params_equal(model, torch_model):
for p, torch_p in zip(model.parameters(), torch_model.parameters()):
assert torch.allclose(p, torch_p, atol=1e-3), f"diff: {torch.abs(p - torch_p)}"
# TODO Something wrong with ci when running this test.
@pytest.mark.skip(reason="skip because of something wrong with CI")
@clear_cache_before_run()

4
tests/test_pipeline/test_schedule/test_interleaved.py

@ -103,9 +103,7 @@ def run_pp(
torch_loss = criterion(torch_output)
torch_loss.backward()
pp_ret = schedule.forward_backward_step(
sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True
)
pp_ret = schedule.forward_backward_step(sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True)
# check loss
if stage_manager.is_last_stage(ignore_chunk=True):

4
tests/test_pipeline/test_schedule/test_oneF_oneB.py

@ -99,9 +99,7 @@ def examine_pp(num_microbatch: int, batch_size: int):
torch_output = torch_model(input_list[0])
torch_loss = criterion(torch_output)
torch_loss.backward()
pp_ret = schedule.forward_backward_step(
sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True
)
pp_ret = schedule.forward_backward_step(sharded_model, iter(input_list), criterion, pp_optimizer, return_loss=True)
# check loss
if stage_manager.is_last_stage():

Loading…
Cancel
Save