|
|
|
@ -84,44 +84,42 @@ optim = DistGaloreAwamW(
|
|
|
|
|
## 兼容性
|
|
|
|
|
<table>
|
|
|
|
|
<tr>
|
|
|
|
|
<th nowrap="nowrap">Model/Feature</th>
|
|
|
|
|
<th nowrap="nowrap" align="center" title="Lamb">Lamb</th>
|
|
|
|
|
<th nowrap="nowrap" align="center" title="GaLore">GaLore</th>
|
|
|
|
|
<th nowrap="nowrap" align="center" title="Adafactor">Adafactor</th>
|
|
|
|
|
<th nowrap="nowrap" align="center" title="CAME">CAME</th>
|
|
|
|
|
<th nowrap="nowrap">Optimizer/Plugin</th>
|
|
|
|
|
<th nowrap="nowrap" align="center">Hybrid Parallel Plugin</th>
|
|
|
|
|
<th nowrap="nowrap" align="center">Low Level Zero Plugin</th>
|
|
|
|
|
<th nowrap="nowrap" align="center">Torch DDP Plugin</th>
|
|
|
|
|
<th nowrap="nowrap" align="center">Gemini Plugin</th>
|
|
|
|
|
<th nowrap="nowrap" align="center">Moe Hybrid Plugin</th>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr>
|
|
|
|
|
<td nowrap="nowrap">Hybrid Parallel<br />Plugin</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center" title="Lamb">Lamb</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr>
|
|
|
|
|
<td nowrap="nowrap">Low Level Zero<br />Plugin</td>
|
|
|
|
|
<td nowrap="nowrap" align="center" title="GaLore">GaLore</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr>
|
|
|
|
|
<td nowrap="nowrap">Torch DDP<br />Plugin</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center" title="Adafactor">Adafactor</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr>
|
|
|
|
|
<td nowrap="nowrap">Gemini<br />Plugin</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
</tr>
|
|
|
|
|
<tr>
|
|
|
|
|
<td nowrap="nowrap">Moe Hybrid<br />Plugin</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center" title="CAME">CAME</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">✔️</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
<td nowrap="nowrap" align="center">❌</td>
|
|
|
|
|
</tr>
|
|
|
|
@ -130,6 +128,7 @@ optim = DistGaloreAwamW(
|
|
|
|
|
</tr>
|
|
|
|
|
</table>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<!-- doc-test-command: colossalai run --nproc_per_node 4 distributed_optimizers.py -->
|
|
|
|
|
|
|
|
|
|
## API 参考
|
|
|
|
|