mirror of https://github.com/InternLM/InternLM
reformat docs
parent
7d2d9fc2f0
commit
344f543c4c
|
@ -43,39 +43,42 @@ msgstr "Training API"
|
||||||
msgid "并行训练"
|
msgid "并行训练"
|
||||||
msgstr "Parallel Training"
|
msgstr "Parallel Training"
|
||||||
|
|
||||||
#: ../../source/index.rst:51 9234725f3c464731993d73607608c874
|
#: ../../source/index.rst:51
|
||||||
|
msgid "混合精度"
|
||||||
|
msgstr "Mixed Precision"
|
||||||
|
|
||||||
|
#: ../../source/index.rst:59 9234725f3c464731993d73607608c874
|
||||||
msgid "模型备份"
|
msgid "模型备份"
|
||||||
msgstr "Model Checkpointing"
|
msgstr "Model Checkpointing"
|
||||||
|
|
||||||
#: ../../source/index.rst:59 8e4ce037017f4510b2892a66003877fa
|
#: ../../source/index.rst:67 8e4ce037017f4510b2892a66003877fa
|
||||||
msgid "性能分析"
|
msgid "性能分析"
|
||||||
msgstr "Profiler"
|
msgstr "Profiler"
|
||||||
|
|
||||||
#: ../../source/index.rst:67 a36e02819ecd4b448a8cb4ebbecb6600
|
#: ../../source/index.rst:75 a36e02819ecd4b448a8cb4ebbecb6600
|
||||||
msgid "训练监控"
|
msgid "训练监控"
|
||||||
msgstr "Monitor"
|
msgstr "Monitor"
|
||||||
|
|
||||||
#: ../../source/index.rst:75 b912e292486f455c8b5cdd75962e8ac2
|
#: ../../source/index.rst:83 b912e292486f455c8b5cdd75962e8ac2
|
||||||
msgid "训练样例"
|
msgid "训练样例"
|
||||||
msgstr "Example"
|
msgstr "Example"
|
||||||
|
|
||||||
#: ../../source/index.rst:83 ea9e9281720941a1830e5df7a2badf7a
|
#: ../../source/index.rst:91 ea9e9281720941a1830e5df7a2badf7a
|
||||||
msgid "常见问题"
|
msgid "常见问题"
|
||||||
msgstr "Q&A"
|
msgstr "Q&A"
|
||||||
|
|
||||||
#: ../../source/index.rst:91 e08edc5aa1c74965b10084b393b88fae
|
#: ../../source/index.rst:99 e08edc5aa1c74965b10084b393b88fae
|
||||||
msgid "索引和表格"
|
msgid "索引和表格"
|
||||||
msgstr "Indices and tables"
|
msgstr "Indices and tables"
|
||||||
|
|
||||||
#: ../../source/index.rst:93 f3fdca059caa49dcad09aa44be7f02d6
|
#: ../../source/index.rst:101 f3fdca059caa49dcad09aa44be7f02d6
|
||||||
msgid ":ref:`genindex`"
|
msgid ":ref:`genindex`"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: ../../source/index.rst:94 b3791e811315435097bb507edc3f4b9b
|
#: ../../source/index.rst:102 b3791e811315435097bb507edc3f4b9b
|
||||||
msgid ":ref:`modindex`"
|
msgid ":ref:`modindex`"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: ../../source/index.rst:95 a164b772960f4ab8b18c7e8820f69f55
|
#: ../../source/index.rst:103 a164b772960f4ab8b18c7e8820f69f55
|
||||||
msgid ":ref:`search`"
|
msgid ":ref:`search`"
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
# SOME DESCRIPTIVE TITLE.
|
||||||
|
# Copyright (C) 2023, InternLM Team
|
||||||
|
# This file is distributed under the same license as the InternLM package.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||||
|
#
|
||||||
|
#, fuzzy
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: InternLM \n"
|
||||||
|
"Report-Msgid-Bugs-To: \n"
|
||||||
|
"POT-Creation-Date: 2023-09-26 15:24+0800\n"
|
||||||
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language: en\n"
|
||||||
|
"Language-Team: en <LL@li.org>\n"
|
||||||
|
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 2.12.1\n"
|
||||||
|
|
||||||
|
#: ../../source/mixed_precision.rst:2
|
||||||
|
msgid "混合精度"
|
||||||
|
msgstr "Mixed Precision"
|
||||||
|
|
||||||
|
#: ../../source/mixed_precision.rst:3
|
||||||
|
msgid ""
|
||||||
|
"混合精度是指在模型训练的过程中同时使用16位和32位浮点类型,是一种在最小化精度损失的前提下加速模型训练的方法。 "
|
||||||
|
"混合精度通过让模型的某些部分使用32位浮点数以保持数值稳定性,并在其余部分利用半精度浮点数加速训练并可以减少内存使用,在评估指标(如准确率)方面仍可以获得同等的训练效果。"
|
||||||
|
msgstr ""
|
||||||
|
"Mixed precision refers to using both 16-bit and 32-bit floating-point types to train model, which can accelerate the model training while minimizing the accuracy loss. "
|
||||||
|
"Mixed precision training uses 32-bit floating-point types in certain parts of the model to maintain numerical stability, and accelerate training and reduce memory usage by using 16-bit floating-point types in other parts. Mixed precision can achieve the same training effect in evaluating indicators such as accuracy."
|
||||||
|
|
||||||
|
#: internlm.core.naive_amp.NaiveAMPModel:1 of
|
||||||
|
msgid ""
|
||||||
|
"This is a wrapper class for a model that automatically casts the model, "
|
||||||
|
"its inputs, and outputs into fp16. It also provides options to cast the "
|
||||||
|
"output back to fp32 and to synchronize buffers."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: internlm.core.naive_amp.NaiveAMPModel of
|
||||||
|
msgid "参数"
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: internlm.core.naive_amp.NaiveAMPModel:4 of
|
||||||
|
msgid "The model to be wrapped and cast into fp16."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: internlm.core.naive_amp.NaiveAMPModel:6 of
|
||||||
|
msgid "If True, the output of this module is cast into fp32. Defaults to True."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: internlm.core.naive_amp.NaiveAMPModel:8 of
|
||||||
|
msgid ""
|
||||||
|
"The parallel group mode used in this module. Defaults to "
|
||||||
|
"``ParallelMode.DATA``."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: internlm.core.naive_amp.NaiveAMPModel:11 of
|
||||||
|
msgid "If True, the buffers are synchronized. Defaults to True."
|
||||||
|
msgstr ""
|
||||||
|
|
||||||
|
#: ../../source/mixed_precision.rst:8
|
||||||
|
msgid "InternLM默认将模型转换为16位精度进行训练(在配置文件中可以设置默认类型为其他数据类型)。在使用混合精度时,需要在构建模型时使用"
|
||||||
|
msgstr "InternLM converts the model to 16-bit floating-point types for model training by default (the default type can be set to other data types in the configuration file). When using mixed precision, it is necessary to use "
|
||||||
|
|
||||||
|
#: ../../source/mixed_precision.rst:14
|
||||||
|
msgid "将模型的某个子模块设置为32精度进行训练,InternLM会在模型训练时自动将数据类型转换成需要的精度。"
|
||||||
|
msgstr "to set a sub-module of the model to 16-bit floating-point types for training, and InternLM will automatically convert the data type to the required precision during model training."
|
||||||
|
|
||||||
|
#: ../../source/mixed_precision.rst:16
|
||||||
|
msgid "例如:"
|
||||||
|
msgstr "For example:"
|
|
@ -455,51 +455,3 @@ msgstr ""
|
||||||
msgid "Whether the gradient is success updated, and the gradient."
|
msgid "Whether the gradient is success updated, and the gradient."
|
||||||
msgstr ""
|
msgstr ""
|
||||||
|
|
||||||
#: ../../source/parallel.rst:155
|
|
||||||
msgid "混合精度"
|
|
||||||
msgstr "Mixed Precision"
|
|
||||||
|
|
||||||
#: ../../source/parallel.rst:156
|
|
||||||
msgid ""
|
|
||||||
"混合精度是指在模型训练的过程中同时使用16位和32位浮点类型,是一种在最小化精度损失的前提下加速模型训练的方法。 "
|
|
||||||
"混合精度通过让模型的某些部分使用32位浮点数以保持数值稳定性,并在其余部分利用半精度浮点数加速训练并减少内存使用,在评估指标(如准确率)方面仍可以获得同等的训练效果。"
|
|
||||||
msgstr ""
|
|
||||||
"Mixed precision refers to using both 16-bit and 32-bit floating-point types to train model, which can accelerate the model training while minimizing the accuracy loss. "
|
|
||||||
"Mixed precision training uses 32-bit floating-point types in certain parts of the model to maintain numerical stability, and accelerate training and reduce memory usage by using 16-bit floating-point types in other parts. Mixed precision can achieve the same training effect in evaluating indicators such as accuracy."
|
|
||||||
|
|
||||||
#: internlm.core.naive_amp.NaiveAMPModel:1 of
|
|
||||||
msgid ""
|
|
||||||
"This is a wrapper class for a model that automatically casts the model, "
|
|
||||||
"its inputs, and outputs into fp16. It also provides options to cast the "
|
|
||||||
"output back to fp32 and to synchronize buffers."
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: internlm.core.naive_amp.NaiveAMPModel:4 of
|
|
||||||
msgid "The model to be wrapped and cast into fp16."
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: internlm.core.naive_amp.NaiveAMPModel:6 of
|
|
||||||
msgid "If True, the output of this module is cast into fp32. Defaults to True."
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: internlm.core.naive_amp.NaiveAMPModel:8 of
|
|
||||||
msgid ""
|
|
||||||
"The parallel group mode used in this module. Defaults to "
|
|
||||||
"``ParallelMode.DATA``."
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: internlm.core.naive_amp.NaiveAMPModel:11 of
|
|
||||||
msgid "If True, the buffers are synchronized. Defaults to True."
|
|
||||||
msgstr ""
|
|
||||||
|
|
||||||
#: ../../source/parallel.rst:161
|
|
||||||
msgid "InternLM默认将模型转换为16位精度进行训练(在配置文件中可以设置默认类型为其他数据类型)。在使用混合精度时,需要在构建模型时使用"
|
|
||||||
msgstr "InternLM converts the model to 16-bit floating-point types for model training by default (the default type can be set to other data types in the configuration file). When using mixed precision, it is necessary to use "
|
|
||||||
|
|
||||||
#: ../../source/parallel.rst:167
|
|
||||||
msgid "将模型的某个子模块设置为32精度进行训练,InternLM会在模型训练时自动将数据类型转换成需要的精度。"
|
|
||||||
msgstr "to set a sub-module of the model to 16-bit floating-point types for training, and InternLM will automatically convert the data type to the required precision during model training."
|
|
||||||
|
|
||||||
#: ../../source/parallel.rst:169
|
|
||||||
msgid "例如:"
|
|
||||||
msgstr "For example:"
|
|
||||||
|
|
|
@ -47,6 +47,14 @@ InternLM
|
||||||
|
|
||||||
parallel
|
parallel
|
||||||
|
|
||||||
|
混合精度
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
mixed_precision
|
||||||
|
|
||||||
模型备份
|
模型备份
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
混合精度
|
||||||
|
-----------------
|
||||||
|
混合精度是指在模型训练的过程中同时使用16位和32位浮点类型,是一种在最小化精度损失的前提下加速模型训练的方法。
|
||||||
|
混合精度通过让模型的某些部分使用32位浮点数以保持数值稳定性,并在其余部分利用半精度浮点数加速训练并可以减少内存使用,在评估指标(如准确率)方面仍可以获得同等的训练效果。
|
||||||
|
|
||||||
|
.. autoclass:: internlm.core.naive_amp.NaiveAMPModel
|
||||||
|
|
||||||
|
InternLM默认将模型转换为16位精度进行训练(在配置文件中可以设置默认类型为其他数据类型)。在使用混合精度时,需要在构建模型时使用
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
set_fp32_attr_to_module(/*fp32 module*/)
|
||||||
|
|
||||||
|
将模型的某个子模块设置为32精度进行训练,InternLM会在模型训练时自动将数据类型转换成需要的精度。
|
||||||
|
|
||||||
|
例如:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
class MlpModel(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.linear1 = nn.Linear(4, 1, bias=False)
|
||||||
|
self.linear2 = nn.Linear(1, 4, bias=False)
|
||||||
|
|
||||||
|
model = MlpModel()
|
||||||
|
# set model.linear2 as fp32 module
|
||||||
|
set_fp32_attr_to_module(model.linear2)
|
||||||
|
|
||||||
|
# apply mixed precision
|
||||||
|
model = NaiveAMPModel(
|
||||||
|
model=model,
|
||||||
|
output_to_fp32=True,
|
||||||
|
dtype=torch.bfloat16(),
|
||||||
|
sync_buffer=False,
|
||||||
|
)
|
|
@ -150,40 +150,3 @@ ZeRO1.5 的实现使用了分层分片的概念,通过配置值 ``parallel.zer
|
||||||
|
|
||||||
.. autoclass:: internlm.solver.optimizer.hybrid_zero_optim.HybridZeroOptimizer
|
.. autoclass:: internlm.solver.optimizer.hybrid_zero_optim.HybridZeroOptimizer
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
混合精度
|
|
||||||
-----------------
|
|
||||||
混合精度是指在模型训练的过程中同时使用16位和32位浮点类型,是一种在最小化精度损失的前提下加速模型训练的方法。
|
|
||||||
混合精度通过让模型的某些部分使用32位浮点数以保持数值稳定性,并在其余部分利用半精度浮点数加速训练并可以减少内存使用,在评估指标(如准确率)方面仍可以获得同等的训练效果。
|
|
||||||
|
|
||||||
.. autoclass:: internlm.core.naive_amp.NaiveAMPModel
|
|
||||||
|
|
||||||
InternLM默认将模型转换为16位精度进行训练(在配置文件中可以设置默认类型为其他数据类型)。在使用混合精度时,需要在构建模型时使用
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
set_fp32_attr_to_module(/*fp32 module*/)
|
|
||||||
|
|
||||||
将模型的某个子模块设置为32精度进行训练,InternLM会在模型训练时自动将数据类型转换成需要的精度。
|
|
||||||
|
|
||||||
例如:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
class MlpModel(nn.Module):
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.linear1 = nn.Linear(4, 1, bias=False)
|
|
||||||
self.linear2 = nn.Linear(1, 4, bias=False)
|
|
||||||
|
|
||||||
model = MlpModel()
|
|
||||||
# set model.linear2 as fp32 module
|
|
||||||
set_fp32_attr_to_module(model.linear2)
|
|
||||||
|
|
||||||
# apply mixed precision
|
|
||||||
model = NaiveAMPModel(
|
|
||||||
model=model,
|
|
||||||
output_to_fp32=True,
|
|
||||||
dtype=torch.bfloat16(),
|
|
||||||
sync_buffer=False,
|
|
||||||
)
|
|
||||||
|
|
Loading…
Reference in New Issue