mirror of https://github.com/hpcaitech/ColossalAI
fix README
parent
b55deb0662
commit
f35326881c
|
@ -1,6 +1,5 @@
|
|||
# ColoDiffusion: Stable Diffusion with Colossal-AI
|
||||
|
||||
|
||||
Acceleration of AIGC (AI-Generated Content) models such as [Stable Diffusion v1](https://github.com/CompVis/stable-diffusion) and [Stable Diffusion v2](https://github.com/Stability-AI/stablediffusion).
|
||||
|
||||
<p id="diffusion_train" align="center">
|
||||
|
@ -57,14 +56,19 @@ pip install transformers==4.19.2 diffusers invisible-watermark
|
|||
pip install -e .
|
||||
```
|
||||
|
||||
##### Step 2: install lightning
|
||||
#### Step 2: install lightning
|
||||
|
||||
Install Lightning version later than 2022.01.04. We suggest you install lightning from source.
|
||||
|
||||
https://github.com/Lightning-AI/lightning.git
|
||||
```
|
||||
git clone https://github.com/Lightning-AI/lightning.git
|
||||
pip install -r requirements.txt
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
#### Step 3:Install [Colossal-AI](https://colossalai.org/download/) From Our Official Website
|
||||
|
||||
##### Step 3:Install [Colossal-AI](https://colossalai.org/download/) From Our Official Website
|
||||
##### From pip
|
||||
|
||||
For example, you can install v0.1.12 from our official website.
|
||||
|
||||
|
@ -72,6 +76,16 @@ For example, you can install v0.1.12 from our official website.
|
|||
pip install colossalai==0.1.12+torch1.12cu11.3 -f https://release.colossalai.org
|
||||
```
|
||||
|
||||
##### From source
|
||||
|
||||
```
|
||||
git clone https://github.com/hpcaitech/ColossalAI.git
|
||||
cd ColossalAI
|
||||
|
||||
# install colossalai
|
||||
CUDA_EXT=1 pip install .
|
||||
```
|
||||
|
||||
### Option #2: Use Docker
|
||||
|
||||
To use the stable diffusion Docker image, you can either build using the provided the [Dockerfile](./docker/Dockerfile) or pull a Docker image from our Docker hub.
|
||||
|
@ -122,6 +136,12 @@ It is important for you to configure your volume mapping in order to get the bes
|
|||
|
||||
## Download the model checkpoint from pretrained
|
||||
|
||||
### stable-diffusion-v2-base
|
||||
|
||||
```
|
||||
wget https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt
|
||||
```
|
||||
|
||||
### stable-diffusion-v1-4
|
||||
|
||||
Our default model config use the weight from [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4?text=A+mecha+robot+in+a+favela+in+expressionist+style)
|
||||
|
|
|
@ -110,6 +110,7 @@ lightning:
|
|||
enable_distributed_storage: True
|
||||
placement_policy: cuda
|
||||
force_outputs_fp32: true
|
||||
min_chunk_size: 64
|
||||
|
||||
log_every_n_steps: 2
|
||||
logger: True
|
||||
|
|
|
@ -107,6 +107,7 @@ lightning:
|
|||
enable_distributed_storage: True
|
||||
placement_policy: cuda
|
||||
force_outputs_fp32: true
|
||||
min_chunk_size: 64
|
||||
|
||||
log_every_n_steps: 2
|
||||
logger: True
|
||||
|
|
|
@ -111,6 +111,7 @@ lightning:
|
|||
enable_distributed_storage: True
|
||||
placement_policy: cuda
|
||||
force_outputs_fp32: true
|
||||
min_chunk_size: 64
|
||||
|
||||
log_every_n_steps: 2
|
||||
logger: True
|
||||
|
|
|
@ -1,120 +0,0 @@
|
|||
model:
|
||||
base_learning_rate: 1.0e-4
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
parameterization: "v"
|
||||
linear_start: 0.00085
|
||||
linear_end: 0.0120
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: txt
|
||||
image_size: 64
|
||||
channels: 4
|
||||
cond_stage_trainable: false
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
scale_factor: 0.18215
|
||||
use_ema: False # we set this to false because this is an inference only config
|
||||
|
||||
scheduler_config: # 10000 warmup steps
|
||||
target: ldm.lr_scheduler.LambdaLinearScheduler
|
||||
params:
|
||||
warm_up_steps: [ 1 ] # NOTE for resuming. use 10000 if starting from scratch
|
||||
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
||||
f_start: [ 1.e-6 ]
|
||||
f_max: [ 1.e-4 ]
|
||||
f_min: [ 1.e-10 ]
|
||||
|
||||
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
use_checkpoint: True
|
||||
use_fp16: True
|
||||
image_size: 32 # unused
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 320
|
||||
attention_resolutions: [ 4, 2, 1 ]
|
||||
num_res_blocks: 2
|
||||
channel_mult: [ 1, 2, 4, 4 ]
|
||||
num_head_channels: 64 # need to fix for flash-attn
|
||||
use_spatial_transformer: True
|
||||
use_linear_in_transformer: True
|
||||
transformer_depth: 1
|
||||
context_dim: 1024
|
||||
legacy: False
|
||||
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
embed_dim: 4
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
#attn_type: "vanilla-xformers"
|
||||
double_z: true
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
||||
params:
|
||||
freeze: True
|
||||
layer: "penultimate"
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 32
|
||||
wrap: False
|
||||
train:
|
||||
target: ldm.data.pokemon.PokemonDataset
|
||||
# params:
|
||||
# file_path: "/data/scratch/diffuser/laion_part0/"
|
||||
# world_size: 1
|
||||
# rank: 0
|
||||
|
||||
lightning:
|
||||
trainer:
|
||||
accelerator: 'gpu'
|
||||
devices: 1
|
||||
log_gpu_memory: all
|
||||
max_epochs: 2
|
||||
precision: 16
|
||||
auto_select_gpus: False
|
||||
strategy:
|
||||
target: strategies.ColossalAIStrategy
|
||||
params:
|
||||
use_chunk: True
|
||||
enable_distributed_storage: True
|
||||
placement_policy: cuda
|
||||
force_outputs_fp32: true
|
||||
|
||||
log_every_n_steps: 2
|
||||
logger: True
|
||||
default_root_dir: "/tmp/diff_log/"
|
||||
# profiler: pytorch
|
||||
|
||||
logger_config:
|
||||
wandb:
|
||||
target: loggers.WandbLogger
|
||||
params:
|
||||
name: nowname
|
||||
save_dir: "/tmp/diff_log/"
|
||||
offline: opt.debug
|
||||
id: nowname
|
|
@ -1,6 +1,5 @@
|
|||
python scripts/txt2img.py --prompt "Teyvat, Name:Layla, Element: Cryo, Weapon:Sword, Region:Sumeru, Model type:Medium Female, Description:a woman in a blue outfit holding a sword" --plms \
|
||||
--outdir ./output \
|
||||
--config /home/lcmql/data2/Genshin/2022-11-18T16-38-46_train_colossalai_teyvattest/checkpoints/last.ckpt \
|
||||
--ckpt /home/lcmql/data2/Genshin/2022-11-18T16-38-46_train_colossalai_teyvattest/configs/2022-11-18T16-38-46-project.yaml \
|
||||
--ckpt /tmp/2022-11-18T16-38-46_train_colossalai/checkpoints/last.ckpt \
|
||||
--config /tmp/2022-11-18T16-38-46_train_colossalai/configs/2022-11-18T16-38-46-project.yaml \
|
||||
--n_samples 4
|
||||
|
||||
|
|
|
@ -2,4 +2,4 @@ HF_DATASETS_OFFLINE=1
|
|||
TRANSFORMERS_OFFLINE=1
|
||||
DIFFUSERS_OFFLINE=1
|
||||
|
||||
python main.py --logdir /tmp -t -b /configs/train_colossalai.yaml
|
||||
python main.py --logdir /tmp -t -b configs/train_colossalai.yaml
|
||||
|
|
Loading…
Reference in New Issue