mirror of https://github.com/InternLM/InternLM
fix lint
parent
5e594c1699
commit
6908fa5e16
|
@ -189,8 +189,8 @@ def generate_interactive(
|
||||||
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
|
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
|
||||||
if not has_default_max_length:
|
if not has_default_max_length:
|
||||||
logger.warn( # pylint: disable=W4902
|
logger.warn( # pylint: disable=W4902
|
||||||
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
f'Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(='
|
||||||
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
f'{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. '
|
||||||
'Please refer to the documentation for more information. '
|
'Please refer to the documentation for more information. '
|
||||||
'(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)',
|
'(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)',
|
||||||
UserWarning,
|
UserWarning,
|
||||||
|
@ -199,8 +199,8 @@ def generate_interactive(
|
||||||
if input_ids_seq_length >= generation_config.max_length:
|
if input_ids_seq_length >= generation_config.max_length:
|
||||||
input_ids_string = 'input_ids'
|
input_ids_string = 'input_ids'
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
|
f'Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to'
|
||||||
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
f' {generation_config.max_length}. This can lead to unexpected behavior. You should consider'
|
||||||
' increasing `max_new_tokens`.')
|
' increasing `max_new_tokens`.')
|
||||||
|
|
||||||
# 2. Set generation parameters if not already defined
|
# 2. Set generation parameters if not already defined
|
||||||
|
@ -510,7 +510,7 @@ def main():
|
||||||
interface.clear_history()
|
interface.clear_history()
|
||||||
f.flush()
|
f.flush()
|
||||||
|
|
||||||
print(f"{args.model}: Accuracy - {sum(scores) / len(scores)}")
|
print(f'{args.model}: Accuracy - {sum(scores) / len(scores)}')
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,8 @@ def generate_interactive(
|
||||||
'max_length') is None and generation_config.max_length is not None
|
'max_length') is None and generation_config.max_length is not None
|
||||||
if has_default_max_length and generation_config.max_new_tokens is None:
|
if has_default_max_length and generation_config.max_new_tokens is None:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
f"Using 'max_length''s default ({repr(generation_config.max_length)}) \
|
f"Using 'max_length''s default \
|
||||||
|
({repr(generation_config.max_length)}) \
|
||||||
to control the generation length. "
|
to control the generation length. "
|
||||||
'This behaviour is deprecated and will be removed from the \
|
'This behaviour is deprecated and will be removed from the \
|
||||||
config in v5 of Transformers -- we'
|
config in v5 of Transformers -- we'
|
||||||
|
@ -102,7 +103,7 @@ def generate_interactive(
|
||||||
if input_ids_seq_length >= generation_config.max_length:
|
if input_ids_seq_length >= generation_config.max_length:
|
||||||
input_ids_string = 'input_ids'
|
input_ids_string = 'input_ids'
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Input length of {input_ids_string} is {input_ids_seq_length}, "
|
f'Input length of {input_ids_string} is {input_ids_seq_length}, '
|
||||||
f"but 'max_length' is set to {generation_config.max_length}. "
|
f"but 'max_length' is set to {generation_config.max_length}. "
|
||||||
'This can lead to unexpected behavior. You should consider'
|
'This can lead to unexpected behavior. You should consider'
|
||||||
" increasing 'max_new_tokens'.")
|
" increasing 'max_new_tokens'.")
|
||||||
|
@ -180,9 +181,9 @@ def on_btn_click():
|
||||||
|
|
||||||
@st.cache_resource
|
@st.cache_resource
|
||||||
def load_model():
|
def load_model():
|
||||||
model = (AutoModelForCausalLM.from_pretrained('internlm/internlm2_5-7b-chat',
|
model = (AutoModelForCausalLM.from_pretrained(
|
||||||
trust_remote_code=True).to(
|
'internlm/internlm2_5-7b-chat',
|
||||||
torch.bfloat16).cuda())
|
trust_remote_code=True).to(torch.bfloat16).cuda())
|
||||||
tokenizer = AutoTokenizer.from_pretrained('internlm/internlm2_5-7b-chat',
|
tokenizer = AutoTokenizer.from_pretrained('internlm/internlm2_5-7b-chat',
|
||||||
trust_remote_code=True)
|
trust_remote_code=True)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
@ -216,7 +217,7 @@ def combine_history(prompt):
|
||||||
meta_instruction = ('You are InternLM (书生·浦语), a helpful, honest, '
|
meta_instruction = ('You are InternLM (书生·浦语), a helpful, honest, '
|
||||||
'and harmless AI assistant developed by Shanghai '
|
'and harmless AI assistant developed by Shanghai '
|
||||||
'AI Laboratory (上海人工智能实验室).')
|
'AI Laboratory (上海人工智能实验室).')
|
||||||
total_prompt = f"<s><|im_start|>system\n{meta_instruction}<|im_end|>\n"
|
total_prompt = f'<s><|im_start|>system\n{meta_instruction}<|im_end|>\n'
|
||||||
for message in messages:
|
for message in messages:
|
||||||
cur_content = message['content']
|
cur_content = message['content']
|
||||||
if message['role'] == 'user':
|
if message['role'] == 'user':
|
||||||
|
|
|
@ -35,6 +35,7 @@ Currently, we support PDF, TXT, and Markdown files, with more file types to be s
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
To get started, install the required packages:
|
To get started, install the required packages:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "fairy-doc[cpu]"
|
pip install "fairy-doc[cpu]"
|
||||||
pip install streamlit
|
pip install streamlit
|
||||||
|
@ -77,8 +78,6 @@ The effect is demonstrated in the video below.
|
||||||
|
|
||||||
https://github.com/libowen2121/InternLM/assets/19970308/1d7f9b87-d458-4f24-9f7a-437a4da3fa6e
|
https://github.com/libowen2121/InternLM/assets/19970308/1d7f9b87-d458-4f24-9f7a-437a4da3fa6e
|
||||||
|
|
||||||
|
|
||||||
## 🔜 Stay Tuned for More
|
## 🔜 Stay Tuned for More
|
||||||
|
|
||||||
We are continuously enhancing our models to better understand and reason with extensive long inputs. Expect new features, improved performance, and expanded capabilities in upcoming updates!
|
We are continuously enhancing our models to better understand and reason with extensive long inputs. Expect new features, improved performance, and expanded capabilities in upcoming updates!
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
### 安装
|
### 安装
|
||||||
|
|
||||||
开始前,请安装所需的依赖:
|
开始前,请安装所需的依赖:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "fairy-doc[cpu]"
|
pip install "fairy-doc[cpu]"
|
||||||
pip install streamlit
|
pip install streamlit
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from openai import OpenAI
|
|
||||||
from magic_doc.docconv import DocConverter
|
from magic_doc.docconv import DocConverter
|
||||||
import argparse
|
from openai import OpenAI
|
||||||
|
|
||||||
# Set up logging
|
# Set up logging
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GenerationConfig:
|
class GenerationConfig:
|
||||||
|
@ -16,13 +19,14 @@ class GenerationConfig:
|
||||||
temperature: float = 0.1
|
temperature: float = 0.1
|
||||||
repetition_penalty: float = 1.005
|
repetition_penalty: float = 1.005
|
||||||
|
|
||||||
|
|
||||||
def generate(
|
def generate(
|
||||||
client,
|
client,
|
||||||
messages,
|
messages,
|
||||||
generation_config,
|
generation_config,
|
||||||
):
|
):
|
||||||
stream = client.chat.completions.create(
|
stream = client.chat.completions.create(
|
||||||
model=st.session_state["model_name"],
|
model=st.session_state['model_name'],
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True,
|
stream=True,
|
||||||
temperature=generation_config.temperature,
|
temperature=generation_config.temperature,
|
||||||
|
@ -32,113 +36,140 @@ def generate(
|
||||||
)
|
)
|
||||||
return stream
|
return stream
|
||||||
|
|
||||||
|
|
||||||
def prepare_generation_config():
|
def prepare_generation_config():
|
||||||
with st.sidebar:
|
with st.sidebar:
|
||||||
max_tokens = st.number_input("Max Tokens", min_value=100, max_value=4096, value=1024)
|
max_tokens = st.number_input('Max Tokens',
|
||||||
top_p = st.number_input("Top P", 0.0, 1.0, 1.0, step=0.01)
|
min_value=100,
|
||||||
temperature = st.number_input("Temperature", 0.0, 1.0, 0.05, step=0.01)
|
max_value=4096,
|
||||||
repetition_penalty = st.number_input("Repetition Penalty", 0.8, 1.2, 1.02, step=0.001, format="%0.3f")
|
value=1024)
|
||||||
st.button("Clear Chat History", on_click=on_btn_click)
|
top_p = st.number_input('Top P', 0.0, 1.0, 1.0, step=0.01)
|
||||||
|
temperature = st.number_input('Temperature', 0.0, 1.0, 0.05, step=0.01)
|
||||||
|
repetition_penalty = st.number_input('Repetition Penalty',
|
||||||
|
0.8,
|
||||||
|
1.2,
|
||||||
|
1.02,
|
||||||
|
step=0.001,
|
||||||
|
format='%0.3f')
|
||||||
|
st.button('Clear Chat History', on_click=on_btn_click)
|
||||||
|
|
||||||
generation_config = GenerationConfig(
|
generation_config = GenerationConfig(max_tokens=max_tokens,
|
||||||
max_tokens=max_tokens,
|
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
repetition_penalty=repetition_penalty
|
repetition_penalty=repetition_penalty)
|
||||||
)
|
|
||||||
|
|
||||||
return generation_config
|
return generation_config
|
||||||
|
|
||||||
|
|
||||||
def on_btn_click():
|
def on_btn_click():
|
||||||
del st.session_state.messages
|
del st.session_state.messages
|
||||||
st.session_state.file_content_found = False
|
st.session_state.file_content_found = False
|
||||||
st.session_state.file_content_used = False
|
st.session_state.file_content_used = False
|
||||||
|
|
||||||
|
|
||||||
user_avator = 'assets/user.png'
|
user_avator = 'assets/user.png'
|
||||||
robot_avator = 'assets/robot.png'
|
robot_avator = 'assets/robot.png'
|
||||||
|
|
||||||
st.title('InternLM2.5 File Chat 📝')
|
st.title('InternLM2.5 File Chat 📝')
|
||||||
|
|
||||||
|
|
||||||
def main(base_url):
|
def main(base_url):
|
||||||
# Initialize the client for the model
|
# Initialize the client for the model
|
||||||
client = OpenAI(
|
client = OpenAI(base_url=base_url, timeout=12000)
|
||||||
base_url=base_url,
|
|
||||||
timeout=12000
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get the model ID
|
# Get the model ID
|
||||||
model_name = client.models.list().data[0].id
|
model_name = client.models.list().data[0].id
|
||||||
st.session_state["model_name"] = model_name
|
st.session_state['model_name'] = model_name
|
||||||
|
|
||||||
# Get the generation config
|
# Get the generation config
|
||||||
generation_config = prepare_generation_config()
|
generation_config = prepare_generation_config()
|
||||||
|
|
||||||
# Initialize session state
|
# Initialize session state
|
||||||
if "messages" not in st.session_state:
|
if 'messages' not in st.session_state:
|
||||||
st.session_state.messages = []
|
st.session_state.messages = []
|
||||||
|
|
||||||
if "file_content_found" not in st.session_state:
|
if 'file_content_found' not in st.session_state:
|
||||||
st.session_state.file_content_found = False
|
st.session_state.file_content_found = False
|
||||||
st.session_state.file_content_used = False
|
st.session_state.file_content_used = False
|
||||||
st.session_state.file_name = ""
|
st.session_state.file_name = ''
|
||||||
|
|
||||||
# Handle file upload
|
# Handle file upload
|
||||||
if not st.session_state.file_content_found:
|
if not st.session_state.file_content_found:
|
||||||
uploaded_file = st.file_uploader("Upload an article", type=("txt", "md", "pdf"))
|
uploaded_file = st.file_uploader('Upload an article',
|
||||||
file_content = ""
|
type=('txt', 'md', 'pdf'))
|
||||||
|
file_content = ''
|
||||||
if uploaded_file is not None:
|
if uploaded_file is not None:
|
||||||
if uploaded_file.type == "application/pdf":
|
if uploaded_file.type == 'application/pdf':
|
||||||
with open("uploaded_file.pdf", "wb") as f:
|
with open('uploaded_file.pdf', 'wb') as f:
|
||||||
f.write(uploaded_file.getbuffer())
|
f.write(uploaded_file.getbuffer())
|
||||||
converter = DocConverter(s3_config=None)
|
converter = DocConverter(s3_config=None)
|
||||||
file_content, time_cost = converter.convert("uploaded_file.pdf", conv_timeout=300)
|
file_content, time_cost = converter.convert(
|
||||||
st.session_state.file_content_found = True # Reset flag when a new file is uploaded
|
'uploaded_file.pdf', conv_timeout=300)
|
||||||
st.session_state.file_content = file_content # Store the file content in session state
|
# Reset flag when a new file is uploaded
|
||||||
st.session_state.file_name = uploaded_file.name # Store the file name in session state
|
st.session_state.file_content_found = True
|
||||||
|
# Store the file content in session state
|
||||||
|
st.session_state.file_content = file_content
|
||||||
|
# Store the file name in session state
|
||||||
|
st.session_state.file_name = uploaded_file.name
|
||||||
else:
|
else:
|
||||||
file_content = uploaded_file.read().decode("utf-8")
|
file_content = uploaded_file.read().decode('utf-8')
|
||||||
st.session_state.file_content_found = True # Reset flag when a new file is uploaded
|
# Reset flag when a new file is uploaded
|
||||||
st.session_state.file_content = file_content # Store the file content in session state
|
st.session_state.file_content_found = True
|
||||||
st.session_state.file_name = uploaded_file.name # Store the file name in session state
|
# Store the file content in session state
|
||||||
|
st.session_state.file_content = file_content
|
||||||
|
# Store the file name in session state
|
||||||
|
st.session_state.file_name = uploaded_file.name
|
||||||
|
|
||||||
if st.session_state.file_content_found:
|
if st.session_state.file_content_found:
|
||||||
st.success(f"File '{st.session_state.file_name}' has been successfully uploaded!")
|
st.success(f"File '{st.session_state.file_name}' "
|
||||||
|
'has been successfully uploaded!')
|
||||||
|
|
||||||
# Display chat messages
|
# Display chat messages
|
||||||
for message in st.session_state.messages:
|
for message in st.session_state.messages:
|
||||||
with st.chat_message(message["role"], avatar=message.get("avatar")):
|
with st.chat_message(message['role'], avatar=message.get('avatar')):
|
||||||
st.markdown(message["content"])
|
st.markdown(message['content'])
|
||||||
|
|
||||||
# Handle user input and response generation
|
# Handle user input and response generation
|
||||||
if prompt := st.chat_input("What's up?"):
|
if prompt := st.chat_input("What's up?"):
|
||||||
turn = {"role": "user", "content": prompt, "avatar": user_avator}
|
turn = {'role': 'user', 'content': prompt, 'avatar': user_avator}
|
||||||
if st.session_state.file_content_found and not st.session_state.file_content_used:
|
if (st.session_state.file_content_found
|
||||||
|
and not st.session_state.file_content_used):
|
||||||
assert st.session_state.file_content is not None
|
assert st.session_state.file_content is not None
|
||||||
merged_prompt = f"{st.session_state.file_content}\n\n{prompt}"
|
merged_prompt = f'{st.session_state.file_content}\n\n{prompt}'
|
||||||
st.session_state.file_content_used = True # Set flag to indicate file content has been used
|
# Set flag to indicate file content has been used
|
||||||
turn["merged_content"] = merged_prompt
|
st.session_state.file_content_used = True
|
||||||
|
turn['merged_content'] = merged_prompt
|
||||||
|
|
||||||
st.session_state.messages.append(turn)
|
st.session_state.messages.append(turn)
|
||||||
with st.chat_message("user", avatar=user_avator):
|
with st.chat_message('user', avatar=user_avator):
|
||||||
st.markdown(prompt)
|
st.markdown(prompt)
|
||||||
|
|
||||||
with st.chat_message("assistant", avatar=robot_avator):
|
with st.chat_message('assistant', avatar=robot_avator):
|
||||||
messages = [
|
messages = [{
|
||||||
{
|
'role':
|
||||||
"role": m["role"],
|
m['role'],
|
||||||
"content": m["merged_content"] if "merged_content" in m else m["content"],
|
'content':
|
||||||
}
|
m['merged_content'] if 'merged_content' in m else m['content'],
|
||||||
for m in st.session_state.messages
|
} for m in st.session_state.messages]
|
||||||
]
|
|
||||||
# Log messages to the terminal
|
# Log messages to the terminal
|
||||||
for m in messages:
|
for m in messages:
|
||||||
logging.info(f"\n\n*** [{m['role']}] ***\n\n\t{m['content']}\n\n")
|
logging.info(
|
||||||
|
f"\n\n*** [{m['role']}] ***\n\n\t{m['content']}\n\n")
|
||||||
stream = generate(client, messages, generation_config)
|
stream = generate(client, messages, generation_config)
|
||||||
response = st.write_stream(stream)
|
response = st.write_stream(stream)
|
||||||
st.session_state.messages.append({"role": "assistant", "content": response, "avatar": robot_avator})
|
st.session_state.messages.append({
|
||||||
|
'role': 'assistant',
|
||||||
|
'content': response,
|
||||||
|
'avatar': robot_avator
|
||||||
|
})
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run Streamlit app with OpenAI client.")
|
if __name__ == '__main__':
|
||||||
parser.add_argument("--base_url", type=str, required=True, help="Base URL for the OpenAI client")
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Run Streamlit app with OpenAI client.')
|
||||||
|
parser.add_argument('--base_url',
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help='Base URL for the OpenAI client')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.base_url)
|
main(args.base_url)
|
||||||
|
|
|
@ -42,7 +42,7 @@ We have evaluated InternLM2.5 on several important benchmarks using the open-sou
|
||||||
### Chat Model
|
### Chat Model
|
||||||
|
|
||||||
| Benchmark | InternLM2-Chat-7B | LLaMA-3-8B-Instruct | Yi-1.5-9B-Chat | GLM-4-9B-Chat | Qwen2-7B-Instruct | Gemma2-9B-IT | InternLM2.5-7B-Chat | Llama-3-70B-Instruct |
|
| Benchmark | InternLM2-Chat-7B | LLaMA-3-8B-Instruct | Yi-1.5-9B-Chat | GLM-4-9B-Chat | Qwen2-7B-Instruct | Gemma2-9B-IT | InternLM2.5-7B-Chat | Llama-3-70B-Instruct |
|
||||||
| ----------------- | ----------------- | ------------------- | -------------- | ------------- | ----------------- | ------------ | ------------------- |------------------- |
|
| ----------------- | ----------------- | ------------------- | -------------- | ------------- | ----------------- | ------------ | ------------------- | -------------------- |
|
||||||
| MMLU(5-shot) | 62.3 | 68.4 | 71.0 | 71.4 | 70.8 | 70.9 | 72.8 | 80.5 |
|
| MMLU(5-shot) | 62.3 | 68.4 | 71.0 | 71.4 | 70.8 | 70.9 | 72.8 | 80.5 |
|
||||||
| CMMLU(5-shot) | 62.4 | 53.3 | 74.5 | 74.5 | 80.9 | 60.3 | 78.0 | 70.1 |
|
| CMMLU(5-shot) | 62.4 | 53.3 | 74.5 | 74.5 | 80.9 | 60.3 | 78.0 | 70.1 |
|
||||||
| BBH(3-shot CoT) | 59.0 | 54.4 | 69.6 | 69.6 | 65.0 | 68.2\* | 71.6 | 80.5 |
|
| BBH(3-shot CoT) | 59.0 | 54.4 | 69.6 | 69.6 | 65.0 | 68.2\* | 71.6 | 80.5 |
|
||||||
|
@ -50,7 +50,6 @@ We have evaluated InternLM2.5 on several important benchmarks using the open-sou
|
||||||
| GSM8K(0-shot CoT) | 72.5 | 72.9 | 80.1 | 85.3 | 82.9 | 88.9 | 86.0 | 92.8 |
|
| GSM8K(0-shot CoT) | 72.5 | 72.9 | 80.1 | 85.3 | 82.9 | 88.9 | 86.0 | 92.8 |
|
||||||
| GPQA(0-shot) | 29.8 | 26.1 | 37.9 | 36.9 | 38.4 | 33.8 | 38.4 | 38.9 |
|
| GPQA(0-shot) | 29.8 | 26.1 | 37.9 | 36.9 | 38.4 | 33.8 | 38.4 | 38.9 |
|
||||||
|
|
||||||
|
|
||||||
- We use `ppl` for the MCQ evaluation on base model.
|
- We use `ppl` for the MCQ evaluation on base model.
|
||||||
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
|
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
|
||||||
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
|
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
|
||||||
|
|
|
@ -16,7 +16,7 @@ The base model of InternLM2 has the following technical features:
|
||||||
## Model Zoo
|
## Model Zoo
|
||||||
|
|
||||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||||
| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
|
| --------------------------- | ----------------------------------------- | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
|
||||||
| **InternLM2-1.8B** | [🤗internlm2-1.8b](https://huggingface.co/internlm/internlm2-1_8b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-1.8b](https://www.modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-1.8b) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-1.8b-original) | 2024-01-31 |
|
| **InternLM2-1.8B** | [🤗internlm2-1.8b](https://huggingface.co/internlm/internlm2-1_8b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-1.8b](https://www.modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-1.8b) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-1.8b-original) | 2024-01-31 |
|
||||||
| **InternLM2-Chat-1.8B-SFT** | [🤗internlm2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-1.8b-sft](https://www.modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b-sft) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b-sft-original) | 2024-01-31 |
|
| **InternLM2-Chat-1.8B-SFT** | [🤗internlm2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-1.8b-sft](https://www.modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b-sft) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b-sft-original) | 2024-01-31 |
|
||||||
| **InternLM2-Chat-1.8B** | [🤗internlm2-chat-1.8b](https://huggingface.co/internlm/internlm2-chat-1_8b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-1.8b](https://www.modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b-original) | 2024-02-19 |
|
| **InternLM2-Chat-1.8B** | [🤗internlm2-chat-1.8b](https://huggingface.co/internlm/internlm2-chat-1_8b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-1.8b](https://www.modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-1.8b-original) | 2024-02-19 |
|
||||||
|
@ -25,8 +25,8 @@ The base model of InternLM2 has the following technical features:
|
||||||
|
|
||||||
We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
|
We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
|
||||||
|
|
||||||
| Dataset\Models | InternLM2-1.8B | InternLM2-Chat-1.8B-SFT | InternLM2-Chat-1.8B | InternLM2-7B | InternLM2-Chat-7B |
|
| Dataset\\Models | InternLM2-1.8B | InternLM2-Chat-1.8B-SFT | InternLM2-Chat-1.8B | InternLM2-7B | InternLM2-Chat-7B |
|
||||||
| :---: | :---: | :---: | :---: | :---: | :---: |
|
| :-------------: | :------------: | :---------------------: | :-----------------: | :----------: | :---------------: |
|
||||||
| MMLU | 46.9 | 47.1 | 44.1 | 65.8 | 63.7 |
|
| MMLU | 46.9 | 47.1 | 44.1 | 65.8 | 63.7 |
|
||||||
| AGIEval | 33.4 | 38.8 | 34.6 | 49.9 | 47.2 |
|
| AGIEval | 33.4 | 38.8 | 34.6 | 49.9 | 47.2 |
|
||||||
| BBH | 37.5 | 35.2 | 34.3 | 65.0 | 61.2 |
|
| BBH | 37.5 | 35.2 | 34.3 | 65.0 | 61.2 |
|
||||||
|
@ -35,6 +35,5 @@ We have evaluated InternLM2 on several important benchmarks using the open-sourc
|
||||||
| HumanEval | 25.0 | 32.9 | 29.3 | 43.3 | 59.8 |
|
| HumanEval | 25.0 | 32.9 | 29.3 | 43.3 | 59.8 |
|
||||||
| MBPP(Sanitized) | 22.2 | 23.2 | 27.0 | 51.8 | 51.4 |
|
| MBPP(Sanitized) | 22.2 | 23.2 | 27.0 | 51.8 | 51.4 |
|
||||||
|
|
||||||
|
|
||||||
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
|
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
|
||||||
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
|
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
|
||||||
|
|
|
@ -21,10 +21,10 @@ class TestChat:
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
'model_name',
|
'model_name',
|
||||||
[
|
[
|
||||||
'internlm/internlm2_5-7b-chat',
|
'internlm/internlm2_5-7b-chat', 'internlm/internlm2-chat-7b',
|
||||||
'internlm/internlm2-chat-7b', 'internlm/internlm2-chat-7b-sft',
|
'internlm/internlm2-chat-7b-sft', 'internlm/internlm2-chat-20b',
|
||||||
'internlm/internlm2-chat-20b', 'internlm/internlm2-chat-20b-sft',
|
'internlm/internlm2-chat-20b-sft', 'internlm/internlm2-chat-1_8b',
|
||||||
'internlm/internlm2-chat-1_8b', 'internlm/internlm2-chat-1_8b-sft'
|
'internlm/internlm2-chat-1_8b-sft'
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
@ -128,8 +128,10 @@ class TestMath:
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
'model_name',
|
'model_name',
|
||||||
['internlm/internlm2-math-7b', 'internlm/internlm2-math-base-7b',
|
[
|
||||||
'internlm/internlm2-math-plus-1_8b', 'internlm/internlm2-math-plus-7b'
|
'internlm/internlm2-math-7b', 'internlm/internlm2-math-base-7b',
|
||||||
|
'internlm/internlm2-math-plus-1_8b',
|
||||||
|
'internlm/internlm2-math-plus-7b'
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
|
@ -59,8 +59,8 @@ def convert(src, tgt):
|
||||||
assert not config.bias, 'Cannot convert InternLM Model with bias to LLaMA.'
|
assert not config.bias, 'Cannot convert InternLM Model with bias to LLaMA.'
|
||||||
|
|
||||||
head_dim = config.hidden_size // config.num_attention_heads
|
head_dim = config.hidden_size // config.num_attention_heads
|
||||||
num_key_value_groups = config.num_attention_heads \
|
num_key_value_groups = \
|
||||||
// config.num_key_value_heads
|
config.num_attention_heads // config.num_key_value_heads
|
||||||
|
|
||||||
# load index json file
|
# load index json file
|
||||||
index_file = 'pytorch_model.bin.index.json'
|
index_file = 'pytorch_model.bin.index.json'
|
||||||
|
@ -140,7 +140,9 @@ def convert(src, tgt):
|
||||||
print(f'Saving to {os.path.join(tgt, filename)}...', flush=True)
|
print(f'Saving to {os.path.join(tgt, filename)}...', flush=True)
|
||||||
if filename.endswith('.safetensors'):
|
if filename.endswith('.safetensors'):
|
||||||
from safetensors.torch import save_file
|
from safetensors.torch import save_file
|
||||||
save_file(llama_states, os.path.join(tgt, filename), metadata={"format": "pt"})
|
save_file(llama_states,
|
||||||
|
os.path.join(tgt, filename),
|
||||||
|
metadata={'format': 'pt'})
|
||||||
else:
|
else:
|
||||||
torch.save(llama_states, os.path.join(tgt, filename))
|
torch.save(llama_states, os.path.join(tgt, filename))
|
||||||
del states
|
del states
|
||||||
|
|
Loading…
Reference in New Issue