[setup] make cuda extension build optional (#2336)

* [setup] make cuda extension build optional

* polish code

* polish code

* polish code
pull/2338/head
Frank Lee 2023-01-05 15:13:11 +08:00 committed by GitHub
parent 8711310cda
commit f1bc2418c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 18 deletions

View File

@ -5,13 +5,31 @@ on:
types: [synchronize, labeled]
jobs:
build:
name: Build and Test Colossal-AI
detect:
name: Detect kernel-related file change
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' &&
contains( github.event.pull_request.labels.*.name, 'Run Build and Test')
outputs:
changedFiles: ${{ steps.find-changed-files.outputs.changedFiles }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v34
with:
since_last_remote_commit: true
files: |
op_builder/**
colossalai/kernel/**
setup.py
build:
name: Build and Test Colossal-AI
needs: detect
runs-on: [self-hosted, gpu]
container:
image: hpcaitech/pytorch-cuda:1.11.0-11.3.0
@ -34,24 +52,15 @@ jobs:
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v34
with:
files: |
op_builder/**
colossalai/kernel/**
setup.py
- name: Restore cache
if: steps.find-changed-files.outputs.any_changed != 'true'
if: needs.detect.outputs.anyChanged == 'true'
run: |
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
- name: Install Colossal-AI
run: |
pip install -v -e .
CUDA_EXT=1 pip install -v -e .
pip install -r requirements/requirements-test.txt
- name: Unit Testing

View File

@ -33,8 +33,7 @@ jobs:
- name: Install Colossal-AI
run: |
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
pip install -r requirements/requirements.txt
pip install -v -e .
CUDA_EXT=1 pip install -v -e .
cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
pip install -r requirements/requirements-test.txt
- name: Unit Testing

View File

@ -18,15 +18,22 @@ try:
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
CUDA_HOME = None
# ninja build does not work unless include_dirs are abs path
this_dir = os.path.dirname(os.path.abspath(__file__))
build_cuda_ext = True
build_cuda_ext = False
ext_modules = []
if int(os.environ.get('NO_CUDA_EXT', '0')) == 1 or not TORCH_AVAILABLE:
build_cuda_ext = False
if int(os.environ.get('CUDA_EXT', '0')) == 1:
if not TORCH_AVAILABLE:
raise ModuleNotFoundError("PyTorch is not found while CUDA_EXT=1. You need to install PyTorch first in order to build CUDA extensions")
if not CUDA_HOME:
raise RuntimeError("CUDA_HOME is not found while CUDA_EXT=1. You need to export CUDA_HOME environment vairable or install CUDA Toolkit first in order to build CUDA extensions")
build_cuda_ext = True
def check_cuda_torch_binary_vs_bare_metal(cuda_dir):