__init__.py
|
feat(moe): add moe module (#182)
|
2023-09-27 15:54:53 +08:00 |
linear.py
|
remove full weight for block 0
|
2023-10-17 16:37:06 +08:00 |
loss.py
|
initial commit
|
2023-07-06 12:55:23 +08:00 |
metrics.py
|
feat(train): add fsdp training option (#293)
|
2023-10-09 18:59:31 +08:00 |
modeling_moe.py
|
feat(moe): add moe module (#182)
|
2023-09-27 15:54:53 +08:00 |
moe.py
|
feat(moe): add moe module (#182)
|
2023-09-27 15:54:53 +08:00 |
multi_head_attention.py
|
remove full weight for block 0
|
2023-10-17 16:37:06 +08:00 |
norm.py
|
Merge develop to main (#233)
|
2023-08-24 22:03:04 +08:00 |
utils.py
|
fix reduce scatter async bug
|
2023-10-17 15:10:07 +08:00 |