[fx] patched torch.max and data movement operator (#1391)

* [fx] patched torch.max and data movement operator * polish code
2022-08-01 15:31:50 +08:00 · 2022-08-01 15:31:50 +08:00 · 7d6293927f
parent db89600cf2
commit 7d6293927f
2 changed files with 52 additions and 0 deletions
--- a/colossalai/fx/tracer/meta_patch/patched_function/torch_ops.py
+++ b/colossalai/fx/tracer/meta_patch/patched_function/torch_ops.py
@ -138,3 +138,36 @@ def torch_roll(input, shifts, dims=None):
 def torch_full(size, fill_value, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False):
    assert out is None, 'assigning result to out is not supported yet'
    return torch.empty(size, device='meta', dtype=dtype, layout=layout, requires_grad=requires_grad)
+
+
+@meta_patched_function.register(torch.max)
+def torch_max(input, dim=None, keepdim=False, *, out=None):
+    assert out is None, 'assigning value to out is not supported yet'
+    if dim is not None:
+        if isinstance(dim, int):
+            shape = list(input.shape)
+            shape.pop(dim)
+            if keepdim:
+                shape.insert(dim, 1)
+            return torch.empty(shape, device='meta', dtype=input.dtype), torch.empty(shape,
+                                                                                     device='meta',
+                                                                                     dtype=input.dtype)
+        elif isinstance(dim, torch.Tensor):
+            # when dim is a 0D or 1D tensor, it will maintain the same shape
+            num_dims = dim.dim()
+            if num_dims in [0, 1]:
+                return torch.empty_like(input, device='meta')
+            else:
+                raise ValueError(f"Expected dim to a 0D or 1D tensor but got {num_dims} dimensions")
+    else:
+        return torch.empty([], device='meta', dtype=input.dtype)
+
+
+@meta_patched_function.register(torch.Tensor.cpu)
+def torch_tensor_cpu(input):
+    return input.clone()
+
+
+@meta_patched_function.register(torch.Tensor.cuda)
+def torch_tensor_cuda(input, *args, **kwargs):
+    return input.clone()
--- a/tests/test_fx/test_tracer/test_patched_op.py
+++ b/tests/test_fx/test_tracer/test_patched_op.py
@ -61,3 +61,22 @@ def test_repeat_interleave():
                         patch_fn=repeat_interleave,
                         expect_exception=True,
                         output_shape=materialized_output.shape)
+
+
+def test_torch_max():
+    data = torch.rand(4, 3)
+    out = torch.max(data)
+    patched_out = patched_function.torch_max(data)
+    assert out.shape == patched_out.shape
+
+    data = torch.rand(4, 3, 2)
+    out, idx = torch.max(data, dim=1)
+    patched_out, patched_idx = patched_function.torch_max(data, dim=1)
+    assert out.shape == patched_out.shape
+    assert idx.shape == patched_idx.shape
+
+    data = torch.rand(4, 3, 2)
+    out, idx = torch.max(data, dim=1, keepdim=True)
+    patched_out, patched_idx = patched_function.torch_max(data, dim=1, keepdim=True)
+    assert out.shape == patched_out.shape
+    assert idx.shape == patched_idx.shape