From f35326881c84398bf40078f401ca95a14df66be1 Mon Sep 17 00:00:00 2001 From: Fazzie <1240419984@qq.com> Date: Tue, 31 Jan 2023 10:00:37 +0800 Subject: [PATCH] fix README --- examples/images/diffusion/README.md | 28 +++- .../Teyvat/train_colossalai_teyvat.yaml | 1 + .../diffusion/configs/train_colossalai.yaml | 1 + .../configs/train_colossalai_cifar10.yaml | 1 + .../diffusion/configs/train_pokemon.yaml | 120 ------------------ examples/images/diffusion/scripts/txt2img.sh | 5 +- examples/images/diffusion/train_colossalai.sh | 8 +- 7 files changed, 33 insertions(+), 131 deletions(-) delete mode 100644 examples/images/diffusion/configs/train_pokemon.yaml diff --git a/examples/images/diffusion/README.md b/examples/images/diffusion/README.md index ddc7e2d97..b68347c00 100644 --- a/examples/images/diffusion/README.md +++ b/examples/images/diffusion/README.md @@ -1,6 +1,5 @@ # ColoDiffusion: Stable Diffusion with Colossal-AI - Acceleration of AIGC (AI-Generated Content) models such as [Stable Diffusion v1](https://github.com/CompVis/stable-diffusion) and [Stable Diffusion v2](https://github.com/Stability-AI/stablediffusion).

@@ -57,14 +56,19 @@ pip install transformers==4.19.2 diffusers invisible-watermark pip install -e . ``` -##### Step 2: install lightning +#### Step 2: install lightning Install Lightning version later than 2022.01.04. We suggest you install lightning from source. -https://github.com/Lightning-AI/lightning.git +``` +git clone https://github.com/Lightning-AI/lightning.git +pip install -r requirements.txt +python setup.py install +``` +#### Step 3:Install [Colossal-AI](https://colossalai.org/download/) From Our Official Website -##### Step 3:Install [Colossal-AI](https://colossalai.org/download/) From Our Official Website +##### From pip For example, you can install v0.1.12 from our official website. @@ -72,6 +76,16 @@ For example, you can install v0.1.12 from our official website. pip install colossalai==0.1.12+torch1.12cu11.3 -f https://release.colossalai.org ``` +##### From source + +``` +git clone https://github.com/hpcaitech/ColossalAI.git +cd ColossalAI + +# install colossalai +CUDA_EXT=1 pip install . +``` + ### Option #2: Use Docker To use the stable diffusion Docker image, you can either build using the provided the [Dockerfile](./docker/Dockerfile) or pull a Docker image from our Docker hub. @@ -122,6 +136,12 @@ It is important for you to configure your volume mapping in order to get the bes ## Download the model checkpoint from pretrained +### stable-diffusion-v2-base + +``` +wget https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt +``` + ### stable-diffusion-v1-4 Our default model config use the weight from [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4?text=A+mecha+robot+in+a+favela+in+expressionist+style) diff --git a/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml b/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml index d466c1c56..8a8250c5d 100644 --- a/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml +++ b/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml @@ -110,6 +110,7 @@ lightning: enable_distributed_storage: True placement_policy: cuda force_outputs_fp32: true + min_chunk_size: 64 log_every_n_steps: 2 logger: True diff --git a/examples/images/diffusion/configs/train_colossalai.yaml b/examples/images/diffusion/configs/train_colossalai.yaml index 0354311f8..88432e978 100644 --- a/examples/images/diffusion/configs/train_colossalai.yaml +++ b/examples/images/diffusion/configs/train_colossalai.yaml @@ -107,6 +107,7 @@ lightning: enable_distributed_storage: True placement_policy: cuda force_outputs_fp32: true + min_chunk_size: 64 log_every_n_steps: 2 logger: True diff --git a/examples/images/diffusion/configs/train_colossalai_cifar10.yaml b/examples/images/diffusion/configs/train_colossalai_cifar10.yaml index 0273ca862..0ba06f832 100644 --- a/examples/images/diffusion/configs/train_colossalai_cifar10.yaml +++ b/examples/images/diffusion/configs/train_colossalai_cifar10.yaml @@ -111,6 +111,7 @@ lightning: enable_distributed_storage: True placement_policy: cuda force_outputs_fp32: true + min_chunk_size: 64 log_every_n_steps: 2 logger: True diff --git a/examples/images/diffusion/configs/train_pokemon.yaml b/examples/images/diffusion/configs/train_pokemon.yaml deleted file mode 100644 index aadb5f2a0..000000000 --- a/examples/images/diffusion/configs/train_pokemon.yaml +++ /dev/null @@ -1,120 +0,0 @@ -model: - base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - parameterization: "v" - linear_start: 0.00085 - linear_end: 0.0120 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: txt - image_size: 64 - channels: 4 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False # we set this to false because this is an inference only config - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [ 1 ] # NOTE for resuming. use 10000 if starting from scratch - cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases - f_start: [ 1.e-6 ] - f_max: [ 1.e-4 ] - f_min: [ 1.e-10 ] - - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - use_checkpoint: True - use_fp16: True - image_size: 32 # unused - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: [ 4, 2, 1 ] - num_res_blocks: 2 - channel_mult: [ 1, 2, 4, 4 ] - num_head_channels: 64 # need to fix for flash-attn - use_spatial_transformer: True - use_linear_in_transformer: True - transformer_depth: 1 - context_dim: 1024 - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - #attn_type: "vanilla-xformers" - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder - params: - freeze: True - layer: "penultimate" - -data: - target: main.DataModuleFromConfig - params: - batch_size: 32 - wrap: False - train: - target: ldm.data.pokemon.PokemonDataset - # params: - # file_path: "/data/scratch/diffuser/laion_part0/" - # world_size: 1 - # rank: 0 - -lightning: - trainer: - accelerator: 'gpu' - devices: 1 - log_gpu_memory: all - max_epochs: 2 - precision: 16 - auto_select_gpus: False - strategy: - target: strategies.ColossalAIStrategy - params: - use_chunk: True - enable_distributed_storage: True - placement_policy: cuda - force_outputs_fp32: true - - log_every_n_steps: 2 - logger: True - default_root_dir: "/tmp/diff_log/" - # profiler: pytorch - - logger_config: - wandb: - target: loggers.WandbLogger - params: - name: nowname - save_dir: "/tmp/diff_log/" - offline: opt.debug - id: nowname diff --git a/examples/images/diffusion/scripts/txt2img.sh b/examples/images/diffusion/scripts/txt2img.sh index 549bb03a6..53041cb8d 100755 --- a/examples/images/diffusion/scripts/txt2img.sh +++ b/examples/images/diffusion/scripts/txt2img.sh @@ -1,6 +1,5 @@ python scripts/txt2img.py --prompt "Teyvat, Name:Layla, Element: Cryo, Weapon:Sword, Region:Sumeru, Model type:Medium Female, Description:a woman in a blue outfit holding a sword" --plms \ --outdir ./output \ - --config /home/lcmql/data2/Genshin/2022-11-18T16-38-46_train_colossalai_teyvattest/checkpoints/last.ckpt \ - --ckpt /home/lcmql/data2/Genshin/2022-11-18T16-38-46_train_colossalai_teyvattest/configs/2022-11-18T16-38-46-project.yaml \ + --ckpt /tmp/2022-11-18T16-38-46_train_colossalai/checkpoints/last.ckpt \ + --config /tmp/2022-11-18T16-38-46_train_colossalai/configs/2022-11-18T16-38-46-project.yaml \ --n_samples 4 - diff --git a/examples/images/diffusion/train_colossalai.sh b/examples/images/diffusion/train_colossalai.sh index 4223a6941..dcaeeb0c6 100755 --- a/examples/images/diffusion/train_colossalai.sh +++ b/examples/images/diffusion/train_colossalai.sh @@ -1,5 +1,5 @@ -HF_DATASETS_OFFLINE=1 -TRANSFORMERS_OFFLINE=1 -DIFFUSERS_OFFLINE=1 +HF_DATASETS_OFFLINE=1 +TRANSFORMERS_OFFLINE=1 +DIFFUSERS_OFFLINE=1 -python main.py --logdir /tmp -t -b /configs/train_colossalai.yaml +python main.py --logdir /tmp -t -b configs/train_colossalai.yaml