From 1bd8a72fc97ca3a5b001d340e65891973b17eccc Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Thu, 9 Jun 2022 15:24:35 +0800 Subject: [PATCH] [workflow] disable p2p via shared memory on non-nvlink machine (#1086) --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e7056383d..07452f4f3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: runs-on: [self-hosted, gpu] container: image: hpcaitech/pytorch-cuda:1.10.1-11.3.0 - options: --shm-size=2gb --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 + options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 timeout-minutes: 40 steps: - uses: actions/checkout@v2 @@ -34,3 +34,4 @@ jobs: PYTHONPATH=$PWD pytest tests env: DATA: /data/scratch/cifar-10 + NCCL_SHM_DISABLE: 1