mirror of https://github.com/hpcaitech/ColossalAI
[misc] update torch version (#6206)
* [misc] update torch version * fix test * fix test * fix test * fix testmain
parent
b9e60559b8
commit
f32861ccc5
|
@ -1,3 +1,3 @@
|
|||
2.2.2-12.1.0
|
||||
2.3.0-12.1.0
|
||||
2.4.0-12.4.1
|
||||
2.5.1-12.4.1
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
{
|
||||
"build": [
|
||||
{
|
||||
"torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121",
|
||||
"torch_command": "pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121",
|
||||
"cuda_image": "hpcaitech/cuda-conda:12.1"
|
||||
},
|
||||
{
|
||||
"torch_command": "pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124",
|
||||
"torch_command": "pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124",
|
||||
"cuda_image": "hpcaitech/cuda-conda:12.4"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -8,7 +8,7 @@ click
|
|||
fabric
|
||||
contexttimer
|
||||
ninja
|
||||
torch>=2.2.0,<=2.4.1
|
||||
torch>=2.2.0,<=2.5.1
|
||||
safetensors
|
||||
einops
|
||||
pydantic
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from colossalai.cluster.device_mesh_manager import DeviceMeshInfo, DeviceMeshManager
|
||||
from colossalai.initialize import launch
|
||||
from colossalai.logging import disable_existing_loggers
|
||||
from colossalai.testing import spawn
|
||||
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
||||
|
||||
|
||||
def check_device_mesh_manager(rank, world_size, port):
|
||||
|
@ -24,6 +24,7 @@ def check_device_mesh_manager(rank, world_size, port):
|
|||
assert device_mesh_with_shape._logical_mesh_id.tolist() == [[0, 1], [2, 3]]
|
||||
|
||||
|
||||
@rerun_if_address_is_in_use()
|
||||
def test_device_mesh_manager():
|
||||
spawn(check_device_mesh_manager, 4)
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
|
|||
if test_config["precision"] == "fp32":
|
||||
atol, rtol = 1e-5, 1e-3
|
||||
else:
|
||||
atol, rtol = 5e-2, 5e-2
|
||||
atol, rtol = 9e-2, 0
|
||||
if (stage_manager is None or stage_manager.is_first_stage()) and booster.plugin.zero_stage == 0:
|
||||
row_layer_grads = get_grad_tensors_for_check(
|
||||
t5, sharded_t5, row_layer_for_check, tp_group, atol=atol, rtol=rtol, dim=0
|
||||
|
|
Loading…
Reference in New Issue