From c701b77b1131a9095f3dca454da4ec667bcbf182 Mon Sep 17 00:00:00 2001 From: NatalieC323 <127177614+NatalieC323@users.noreply.github.com> Date: Thu, 6 Apr 2023 17:50:52 +0800 Subject: [PATCH] [dreambooth] fixing the incompatibity in requirements.txt (#3190) (#3378) * Update requirements.txt * Update environment.yaml * Update README.md * Update environment.yaml * Update README.md * Update README.md * Delete requirements_colossalai.txt * Update requirements.txt * Update README.md --- .../Teyvat/train_colossalai_teyvat.yaml | 16 ++-- .../diffusion/configs/train_colossalai.yaml | 22 ++--- .../configs/train_colossalai_cifar10.yaml | 16 ++-- .../images/diffusion/configs/train_ddp.yaml | 14 +-- .../diffusion/ldm/models/autoencoder.py | 5 +- .../ldm/models/diffusion/classifier.py | 9 +- .../diffusion/ldm/models/diffusion/ddpm.py | 22 +++-- examples/images/diffusion/main.py | 94 ++++++++++++------- examples/images/diffusion/scripts/img2img.py | 4 +- examples/images/diffusion/scripts/inpaint.py | 4 +- examples/images/diffusion/scripts/knn2img.py | 5 +- .../diffusion/scripts/sample_diffusion.py | 3 +- .../scripts/tests/test_checkpoint.py | 4 +- examples/images/diffusion/scripts/txt2img.py | 4 +- 14 files changed, 124 insertions(+), 98 deletions(-) diff --git a/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml b/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml index ff0f4c5a0..fe883cdfd 100644 --- a/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml +++ b/examples/images/diffusion/configs/Teyvat/train_colossalai_teyvat.yaml @@ -1,6 +1,6 @@ model: base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion + #target: ldm.models.diffusion.ddpm.LatentDiffusion params: parameterization: "v" linear_start: 0.00085 @@ -20,7 +20,7 @@ model: use_ema: False scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler + #target: ldm.lr_scheduler.LambdaLinearScheduler params: warm_up_steps: [ 1 ] # NOTE for resuming. use 10000 if starting from scratch cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases @@ -30,7 +30,7 @@ model: unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel + #target: ldm.modules.diffusionmodules.openaimodel.UNetModel params: use_checkpoint: True use_fp16: True @@ -49,7 +49,7 @@ model: legacy: False first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL + #target: ldm.models.autoencoder.AutoencoderKL params: embed_dim: 4 monitor: val/rec_loss @@ -73,13 +73,13 @@ model: target: torch.nn.Identity cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + #target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder params: freeze: True layer: "penultimate" data: - target: main.DataModuleFromConfig + #target: main.DataModuleFromConfig params: batch_size: 16 num_workers: 4 @@ -105,7 +105,7 @@ lightning: precision: 16 auto_select_gpus: False strategy: - target: strategies.ColossalAIStrategy + #target: strategies.ColossalAIStrategy params: use_chunk: True enable_distributed_storage: True @@ -120,7 +120,7 @@ lightning: logger_config: wandb: - target: loggers.WandbLogger + #target: loggers.WandbLogger params: name: nowname save_dir: "/tmp/diff_log/" diff --git a/examples/images/diffusion/configs/train_colossalai.yaml b/examples/images/diffusion/configs/train_colossalai.yaml index 88432e978..388ab2e8f 100644 --- a/examples/images/diffusion/configs/train_colossalai.yaml +++ b/examples/images/diffusion/configs/train_colossalai.yaml @@ -1,6 +1,6 @@ model: base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion + #target: ldm.models.diffusion.ddpm.LatentDiffusion params: parameterization: "v" linear_start: 0.00085 @@ -19,7 +19,7 @@ model: use_ema: False # we set this to false because this is an inference only config scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler + #target: ldm.lr_scheduler.LambdaLinearScheduler params: warm_up_steps: [ 1 ] # NOTE for resuming. use 10000 if starting from scratch cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases @@ -29,7 +29,7 @@ model: unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel + #target: ldm.modules.diffusionmodules.openaimodel.UNetModel params: use_checkpoint: True use_fp16: True @@ -48,7 +48,7 @@ model: legacy: False first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL + #target: ldm.models.autoencoder.AutoencoderKL params: embed_dim: 4 monitor: val/rec_loss @@ -69,16 +69,16 @@ model: attn_resolutions: [] dropout: 0.0 lossconfig: - target: torch.nn.Identity + #target: torch.nn.Identity cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + #target: #ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder params: freeze: True layer: "penultimate" data: - target: main.DataModuleFromConfig + #target: #main.DataModuleFromConfig params: batch_size: 128 wrap: False @@ -88,20 +88,20 @@ data: train: target: ldm.data.base.Txt2ImgIterableBaseDataset params: - file_path: # YOUR DATASET_PATH + file_path: /data/scratch/diffuser/laion_part0/ world_size: 1 rank: 0 lightning: trainer: accelerator: 'gpu' - devices: 8 + devices: 2 log_gpu_memory: all max_epochs: 2 precision: 16 auto_select_gpus: False strategy: - target: strategies.ColossalAIStrategy + #target: #strategies.ColossalAIStrategy params: use_chunk: True enable_distributed_storage: True @@ -116,7 +116,7 @@ lightning: logger_config: wandb: - target: loggers.WandbLogger + #target: #loggers.WandbLogger params: name: nowname save_dir: "/tmp/diff_log/" diff --git a/examples/images/diffusion/configs/train_colossalai_cifar10.yaml b/examples/images/diffusion/configs/train_colossalai_cifar10.yaml index 0ba06f832..1331f96e3 100644 --- a/examples/images/diffusion/configs/train_colossalai_cifar10.yaml +++ b/examples/images/diffusion/configs/train_colossalai_cifar10.yaml @@ -1,6 +1,6 @@ model: base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion + #target: ldm.models.diffusion.ddpm.LatentDiffusion params: parameterization: "v" linear_start: 0.00085 @@ -19,7 +19,7 @@ model: use_ema: False # we set this to false because this is an inference only config scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler + #target: ldm.lr_scheduler.LambdaLinearScheduler params: warm_up_steps: [ 1 ] # NOTE for resuming. use 10000 if starting from scratch cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases @@ -29,7 +29,7 @@ model: unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel + #target: ldm.modules.diffusionmodules.openaimodel.UNetModel params: use_checkpoint: True use_fp16: True @@ -48,7 +48,7 @@ model: legacy: False first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL + #target: ldm.models.autoencoder.AutoencoderKL params: embed_dim: 4 monitor: val/rec_loss @@ -69,16 +69,16 @@ model: attn_resolutions: [] dropout: 0.0 lossconfig: - target: torch.nn.Identity + #target: torch.nn.Identity cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + #target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder params: freeze: True layer: "penultimate" data: - target: main.DataModuleFromConfig + #target: main.DataModuleFromConfig params: batch_size: 4 num_workers: 4 @@ -105,7 +105,7 @@ lightning: precision: 16 auto_select_gpus: False strategy: - target: strategies.ColossalAIStrategy + #target: strategies.ColossalAIStrategy params: use_chunk: True enable_distributed_storage: True diff --git a/examples/images/diffusion/configs/train_ddp.yaml b/examples/images/diffusion/configs/train_ddp.yaml index a63df887e..df591f33d 100644 --- a/examples/images/diffusion/configs/train_ddp.yaml +++ b/examples/images/diffusion/configs/train_ddp.yaml @@ -1,6 +1,6 @@ model: base_learning_rate: 1.0e-4 - target: ldm.models.diffusion.ddpm.LatentDiffusion + #target: ldm.models.diffusion.ddpm.LatentDiffusion params: parameterization: "v" linear_start: 0.00085 @@ -29,7 +29,7 @@ model: unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel + #target: ldm.modules.diffusionmodules.openaimodel.UNetModel params: use_checkpoint: True use_fp16: True @@ -48,7 +48,7 @@ model: legacy: False first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL + #target: ldm.models.autoencoder.AutoencoderKL params: embed_dim: 4 monitor: val/rec_loss @@ -72,13 +72,13 @@ model: target: torch.nn.Identity cond_stage_config: - target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + #target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder params: freeze: True layer: "penultimate" data: - target: main.DataModuleFromConfig + #target: main.DataModuleFromConfig params: batch_size: 128 # num_workwers should be 2 * batch_size, and the total num less than 1024 @@ -100,7 +100,7 @@ lightning: precision: 16 auto_select_gpus: False strategy: - target: strategies.DDPStrategy + #target: strategies.DDPStrategy params: find_unused_parameters: False log_every_n_steps: 2 @@ -111,7 +111,7 @@ lightning: logger_config: wandb: - target: loggers.WandbLogger + #target: loggers.WandbLogger params: name: nowname save_dir: "/data2/tmp/diff_log/" diff --git a/examples/images/diffusion/ldm/models/autoencoder.py b/examples/images/diffusion/ldm/models/autoencoder.py index b1bd83778..145ccf6fb 100644 --- a/examples/images/diffusion/ldm/models/autoencoder.py +++ b/examples/images/diffusion/ldm/models/autoencoder.py @@ -6,11 +6,10 @@ except: import torch.nn.functional as F from contextlib import contextmanager +from torch.nn import Identity from ldm.modules.diffusionmodules.model import Encoder, Decoder from ldm.modules.distributions.distributions import DiagonalGaussianDistribution - -from ldm.util import instantiate_from_config from ldm.modules.ema import LitEma @@ -32,7 +31,7 @@ class AutoencoderKL(pl.LightningModule): self.image_key = image_key self.encoder = Encoder(**ddconfig) self.decoder = Decoder(**ddconfig) - self.loss = instantiate_from_config(lossconfig) + self.loss = Identity(**lossconfig.get("params", dict())) assert ddconfig["double_z"] self.quant_conv = torch.nn.Conv2d(2*ddconfig["z_channels"], 2*embed_dim, 1) self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) diff --git a/examples/images/diffusion/ldm/models/diffusion/classifier.py b/examples/images/diffusion/ldm/models/diffusion/classifier.py index 612a8371b..3cf12f093 100644 --- a/examples/images/diffusion/ldm/models/diffusion/classifier.py +++ b/examples/images/diffusion/ldm/models/diffusion/classifier.py @@ -9,9 +9,10 @@ from copy import deepcopy from einops import rearrange from glob import glob from natsort import natsorted - +from ldm.models.diffusion.ddpm import LatentDiffusion +from ldm.lr_scheduler import LambdaLinearScheduler from ldm.modules.diffusionmodules.openaimodel import EncoderUNetModel, UNetModel -from ldm.util import log_txt_as_img, default, ismap, instantiate_from_config +from ldm.util import log_txt_as_img, default, ismap __models__ = { 'class_label': EncoderUNetModel, @@ -86,7 +87,7 @@ class NoisyLatentImageClassifier(pl.LightningModule): print(f"Unexpected Keys: {unexpected}") def load_diffusion(self): - model = instantiate_from_config(self.diffusion_config) + model = LatentDiffusion(**self.diffusion_config.get('params',dict())) self.diffusion_model = model.eval() self.diffusion_model.train = disabled_train for param in self.diffusion_model.parameters(): @@ -221,7 +222,7 @@ class NoisyLatentImageClassifier(pl.LightningModule): optimizer = AdamW(self.model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay) if self.use_scheduler: - scheduler = instantiate_from_config(self.scheduler_config) + scheduler = LambdaLinearScheduler(**self.scheduler_config.get('params',dict())) print("Setting up LambdaLR scheduler...") scheduler = [ diff --git a/examples/images/diffusion/ldm/models/diffusion/ddpm.py b/examples/images/diffusion/ldm/models/diffusion/ddpm.py index b7315b048..11de82873 100644 --- a/examples/images/diffusion/ldm/models/diffusion/ddpm.py +++ b/examples/images/diffusion/ldm/models/diffusion/ddpm.py @@ -22,6 +22,7 @@ from contextlib import contextmanager, nullcontext from functools import partial from einops import rearrange, repeat +from ldm.lr_scheduler import LambdaLinearScheduler from ldm.models.autoencoder import * from ldm.models.autoencoder import AutoencoderKL, IdentityFirstStage from ldm.models.diffusion.ddim import * @@ -29,9 +30,10 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.modules.diffusionmodules.model import * from ldm.modules.diffusionmodules.model import Decoder, Encoder, Model from ldm.modules.diffusionmodules.openaimodel import * -from ldm.modules.diffusionmodules.openaimodel import AttentionPool2d +from ldm.modules.diffusionmodules.openaimodel import AttentionPool2d, UNetModel from ldm.modules.diffusionmodules.util import extract_into_tensor, make_beta_schedule, noise_like from ldm.modules.distributions.distributions import DiagonalGaussianDistribution, normal_kl +from ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation from ldm.modules.ema import LitEma from ldm.modules.encoders.modules import * from ldm.util import count_params, default, exists, instantiate_from_config, isimage, ismap, log_txt_as_img, mean_flat @@ -39,6 +41,7 @@ from omegaconf import ListConfig from torch.optim.lr_scheduler import LambdaLR from torchvision.utils import make_grid from tqdm import tqdm +from ldm.modules.midas.api import MiDaSInference __conditioning_keys__ = {'concat': 'c_concat', 'crossattn': 'c_crossattn', 'adm': 'y'} @@ -690,7 +693,7 @@ class LatentDiffusion(DDPM): self.make_cond_schedule() def instantiate_first_stage(self, config): - model = instantiate_from_config(config) + model = AutoencoderKL(**config.get("params", dict())) self.first_stage_model = model.eval() self.first_stage_model.train = disabled_train for param in self.first_stage_model.parameters(): @@ -706,7 +709,7 @@ class LatentDiffusion(DDPM): self.cond_stage_model = None # self.be_unconditional = True else: - model = instantiate_from_config(config) + model = FrozenOpenCLIPEmbedder(**config.get("params", dict())) self.cond_stage_model = model.eval() self.cond_stage_model.train = disabled_train for param in self.cond_stage_model.parameters(): @@ -714,7 +717,7 @@ class LatentDiffusion(DDPM): else: assert config != '__is_first_stage__' assert config != '__is_unconditional__' - model = instantiate_from_config(config) + model = FrozenOpenCLIPEmbedder(**config.get("params", dict())) self.cond_stage_model = model def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False): @@ -1479,8 +1482,7 @@ class LatentDiffusion(DDPM): # opt = torch.optim.AdamW(params, lr=lr) if self.use_scheduler: - assert 'target' in self.scheduler_config - scheduler = instantiate_from_config(self.scheduler_config) + scheduler = LambdaLinearScheduler(**self.scheduler_config.get("params", dict())) rank_zero_info("Setting up LambdaLR scheduler...") scheduler = [{'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule), 'interval': 'step', 'frequency': 1}] @@ -1502,7 +1504,7 @@ class DiffusionWrapper(pl.LightningModule): def __init__(self, diff_model_config, conditioning_key): super().__init__() self.sequential_cross_attn = diff_model_config.pop("sequential_crossattn", False) - self.diffusion_model = instantiate_from_config(diff_model_config) + self.diffusion_model = UNetModel(**diff_model_config.get("params", dict())) self.conditioning_key = conditioning_key assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm', 'hybrid-adm', 'crossattn-adm'] @@ -1551,7 +1553,7 @@ class LatentUpscaleDiffusion(LatentDiffusion): self.noise_level_key = noise_level_key def instantiate_low_stage(self, config): - model = instantiate_from_config(config) + model = ImageConcatWithNoiseAugmentation(**config.get("params", dict())) self.low_scale_model = model.eval() self.low_scale_model.train = disabled_train for param in self.low_scale_model.parameters(): @@ -1933,7 +1935,7 @@ class LatentDepth2ImageDiffusion(LatentFinetuneDiffusion): def __init__(self, depth_stage_config, concat_keys=("midas_in",), *args, **kwargs): super().__init__(concat_keys=concat_keys, *args, **kwargs) - self.depth_model = instantiate_from_config(depth_stage_config) + self.depth_model = MiDaSInference(**depth_stage_config.get("params", dict())) self.depth_stage_key = concat_keys[0] @torch.no_grad() @@ -2006,7 +2008,7 @@ class LatentUpscaleFinetuneDiffusion(LatentFinetuneDiffusion): self.low_scale_key = low_scale_key def instantiate_low_stage(self, config): - model = instantiate_from_config(config) + model = ImageConcatWithNoiseAugmentation(**config.get("params", dict())) self.low_scale_model = model.eval() self.low_scale_model.train = disabled_train for param in self.low_scale_model.parameters(): diff --git a/examples/images/diffusion/main.py b/examples/images/diffusion/main.py index 91b809d5a..aeed6d556 100644 --- a/examples/images/diffusion/main.py +++ b/examples/images/diffusion/main.py @@ -23,19 +23,21 @@ from packaging import version from PIL import Image from prefetch_generator import BackgroundGenerator from torch.utils.data import DataLoader, Dataset, Subset, random_split - -try: - from lightning.pytorch import seed_everything - from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint - from lightning.pytorch.trainer import Trainer - from lightning.pytorch.utilities import rank_zero_info, rank_zero_only - LIGHTNING_PACK_NAME = "lightning.pytorch." -except: - from pytorch_lightning import seed_everything - from pytorch_lightning.callbacks import Callback, LearningRateMonitor, ModelCheckpoint - from pytorch_lightning.trainer import Trainer - from pytorch_lightning.utilities import rank_zero_info, rank_zero_only - LIGHTNING_PACK_NAME = "pytorch_lightning." +from ldm.models.diffusion.ddpm import LatentDiffusion +#try: +from lightning.pytorch import seed_everything +from lightning.pytorch.callbacks import Callback, LearningRateMonitor, ModelCheckpoint +from lightning.pytorch.trainer import Trainer +from lightning.pytorch.utilities import rank_zero_info, rank_zero_only +from lightning.pytorch.loggers import WandbLogger, TensorBoardLogger +from lightning.pytorch.strategies import ColossalAIStrategy,DDPStrategy +LIGHTNING_PACK_NAME = "lightning.pytorch." +# #except: +# from pytorch_lightning import seed_everything +# from pytorch_lightning.callbacks import Callback, LearningRateMonitor, ModelCheckpoint +# from pytorch_lightning.trainer import Trainer +# from pytorch_lightning.utilities import rank_zero_info, rank_zero_only +# LIGHTNING_PACK_NAME = "pytorch_lightning." from ldm.data.base import Txt2ImgIterableBaseDataset from ldm.util import instantiate_from_config @@ -575,7 +577,7 @@ if __name__ == "__main__": # target: path to test dataset # params: # key: value - # lightning: (optional, has sane defaults and can be specified on cmdline) + # lightning: (optional, has same defaults and can be specified on cmdline) # trainer: # additional arguments to trainer # logger: @@ -653,7 +655,7 @@ if __name__ == "__main__": # Sets the seed for the random number generator to ensure reproducibility seed_everything(opt.seed) - # Intinalize and save configuratioon using teh OmegaConf library. + # Intinalize and save configuration using the OmegaConf library. try: # init and save configs configs = [OmegaConf.load(cfg) for cfg in opt.base] @@ -687,7 +689,7 @@ if __name__ == "__main__": config.model["params"].update({"ckpt": ckpt}) rank_zero_info("Using ckpt_path = {}".format(config.model["params"]["ckpt"])) - model = instantiate_from_config(config.model) + model = LatentDiffusion(**config.model.get("params", dict())) # trainer and callbacks trainer_kwargs = dict() @@ -696,7 +698,7 @@ if __name__ == "__main__": # These loggers are specified as targets in the dictionary, along with the configuration settings specific to each logger. default_logger_cfgs = { "wandb": { - "target": LIGHTNING_PACK_NAME + "loggers.WandbLogger", + #"target": LIGHTNING_PACK_NAME + "loggers.WandbLogger", "params": { "name": nowname, "save_dir": logdir, @@ -705,7 +707,7 @@ if __name__ == "__main__": } }, "tensorboard": { - "target": LIGHTNING_PACK_NAME + "loggers.TensorBoardLogger", + #"target": LIGHTNING_PACK_NAME + "loggers.TensorBoardLogger", "params": { "save_dir": logdir, "name": "diff_tb", @@ -718,30 +720,32 @@ if __name__ == "__main__": default_logger_cfg = default_logger_cfgs["tensorboard"] if "logger" in lightning_config: logger_cfg = lightning_config.logger + logger_cfg = OmegaConf.merge(default_logger_cfg, logger_cfg) + trainer_kwargs["logger"] = WandbLogger(**logger_cfg.get("params", dict())) else: logger_cfg = default_logger_cfg - logger_cfg = OmegaConf.merge(default_logger_cfg, logger_cfg) - trainer_kwargs["logger"] = instantiate_from_config(logger_cfg) + logger_cfg = OmegaConf.merge(default_logger_cfg, logger_cfg) + trainer_kwargs["logger"] = TensorBoardLogger(**logger_cfg.get("params", dict())) + # config the strategy, defualt is ddp if "strategy" in trainer_config: strategy_cfg = trainer_config["strategy"] - strategy_cfg["target"] = LIGHTNING_PACK_NAME + strategy_cfg["target"] + trainer_kwargs["strategy"] = ColossalAIStrategy(**strategy_cfg.get("params", dict())) else: strategy_cfg = { - "target": LIGHTNING_PACK_NAME + "strategies.DDPStrategy", + #"target": LIGHTNING_PACK_NAME + "strategies.DDPStrategy", "params": { "find_unused_parameters": False } } - - trainer_kwargs["strategy"] = instantiate_from_config(strategy_cfg) + trainer_kwargs["strategy"] = DDPStrategy(**strategy_cfg.get("params", dict())) # Set up ModelCheckpoint callback to save best models # modelcheckpoint - use TrainResult/EvalResult(checkpoint_on=metric) to # specify which metric is used to determine best models default_modelckpt_cfg = { - "target": LIGHTNING_PACK_NAME + "callbacks.ModelCheckpoint", + #"target": LIGHTNING_PACK_NAME + "callbacks.ModelCheckpoint", "params": { "dirpath": ckptdir, "filename": "{epoch:06}", @@ -759,13 +763,13 @@ if __name__ == "__main__": modelckpt_cfg = OmegaConf.create() modelckpt_cfg = OmegaConf.merge(default_modelckpt_cfg, modelckpt_cfg) if version.parse(pl.__version__) < version.parse('1.4.0'): - trainer_kwargs["checkpoint_callback"] = instantiate_from_config(modelckpt_cfg) + trainer_kwargs["checkpoint_callback"] = ModelCheckpoint(**modelckpt_cfg.get("params", dict())) # Set up various callbacks, including logging, learning rate monitoring, and CUDA management # add callback which sets up log directory default_callbacks_cfg = { "setup_callback": { # callback to set up the training - "target": "main.SetupCallback", + #"target": "main.SetupCallback", "params": { "resume": opt.resume, # resume training if applicable "now": now, @@ -777,7 +781,7 @@ if __name__ == "__main__": } }, "image_logger": { # callback to log image data - "target": "main.ImageLogger", + #"target": "main.ImageLogger", "params": { "batch_frequency": 750, # how frequently to log images "max_images": 4, # maximum number of images to log @@ -785,14 +789,14 @@ if __name__ == "__main__": } }, "learning_rate_logger": { # callback to log learning rate - "target": "main.LearningRateMonitor", + #"target": "main.LearningRateMonitor", "params": { "logging_interval": "step", # logging frequency (either 'step' or 'epoch') # "log_momentum": True # whether to log momentum (currently commented out) } }, "cuda_callback": { # callback to handle CUDA-related operations - "target": "main.CUDACallback" + #"target": "main.CUDACallback" }, } @@ -810,7 +814,7 @@ if __name__ == "__main__": 'Caution: Saving checkpoints every n train steps without deleting. This might require some free space.') default_metrics_over_trainsteps_ckpt_dict = { 'metrics_over_trainsteps_checkpoint': { - "target": LIGHTNING_PACK_NAME + 'callbacks.ModelCheckpoint', + #"target": LIGHTNING_PACK_NAME + 'callbacks.ModelCheckpoint', 'params': { "dirpath": os.path.join(ckptdir, 'trainstep_checkpoints'), "filename": "{epoch:06}-{step:09}", @@ -825,15 +829,35 @@ if __name__ == "__main__": # Merge the default callbacks configuration with the specified callbacks configuration, and instantiate the callbacks callbacks_cfg = OmegaConf.merge(default_callbacks_cfg, callbacks_cfg) + + #Instantiate items according to the configs + trainer_kwargs.setdefault("callbacks", []) - trainer_kwargs["callbacks"] = [instantiate_from_config(callbacks_cfg[k]) for k in callbacks_cfg] + if "setup_callback" in callbacks_cfg: + setup_callback_config = callbacks_cfg["setup_callback"] + trainer_kwargs["callbacks"].append(SetupCallback(**setup_callback_config.get("params", dict()))) - # Create a Trainer object with the specified command-line arguments and keyword arguments, and set the log directory + if "image_logger" in callbacks_cfg: + image_logger_config = callbacks_cfg["image_logger"] + trainer_kwargs["callbacks"].append(ImageLogger(**image_logger_config.get("params", dict()))) + + if "learning_rate_logger" in callbacks_cfg: + learning_rate_logger_config = callbacks_cfg["learning_rate_logger"] + trainer_kwargs["callbacks"].append(LearningRateMonitor(**learning_rate_logger_config.get("params", dict()))) + + if "cuda_callback" in callbacks_cfg: + cuda_callback_config = callbacks_cfg["cuda_callback"] + trainer_kwargs["callbacks"].append(CUDACallback(**cuda_callback_config.get("params", dict()))) + + if "metrics_over_trainsteps_checkpoint" in callbacks_cfg: + metrics_over_config = callbacks_cfg['metrics_over_trainsteps_checkpoint'] + trainer_kwargs["callbacks"].append(ModelCheckpoint(**metrics_over_config.get("params", dict()))) + #trainer_kwargs["callbacks"] = [instantiate_from_config(callbacks_cfg[k]) for k in callbacks_cfg] trainer = Trainer.from_argparse_args(trainer_opt, **trainer_kwargs) trainer.logdir = logdir - + # Create a data module based on the configuration file - data = instantiate_from_config(config.data) + data = DataModuleFromConfig(**config.data.get("params", dict())) # NOTE according to https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html # calling these ourselves should not be necessary but it is. # lightning still takes care of proper multiprocessing though diff --git a/examples/images/diffusion/scripts/img2img.py b/examples/images/diffusion/scripts/img2img.py index 877538d47..a3011005c 100644 --- a/examples/images/diffusion/scripts/img2img.py +++ b/examples/images/diffusion/scripts/img2img.py @@ -20,8 +20,8 @@ from imwatermark import WatermarkEncoder from scripts.txt2img import put_watermark -from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.ddpm import LatentDiffusion from utils import replace_module, getModelSize @@ -36,7 +36,7 @@ def load_model_from_config(config, ckpt, verbose=False): if "global_step" in pl_sd: print(f"Global Step: {pl_sd['global_step']}") sd = pl_sd["state_dict"] - model = instantiate_from_config(config.model) + model = LatentDiffusion(**config.model.get("params", dict())) m, u = model.load_state_dict(sd, strict=False) if len(m) > 0 and verbose: print("missing keys:") diff --git a/examples/images/diffusion/scripts/inpaint.py b/examples/images/diffusion/scripts/inpaint.py index d6e6387a9..993c67b0e 100644 --- a/examples/images/diffusion/scripts/inpaint.py +++ b/examples/images/diffusion/scripts/inpaint.py @@ -4,7 +4,7 @@ from PIL import Image from tqdm import tqdm import numpy as np import torch -from main import instantiate_from_config +from ldm.models.diffusion.ddpm import LatentgDiffusion from ldm.models.diffusion.ddim import DDIMSampler @@ -57,7 +57,7 @@ if __name__ == "__main__": print(f"Found {len(masks)} inputs.") config = OmegaConf.load("models/ldm/inpainting_big/config.yaml") - model = instantiate_from_config(config.model) + model = LatentDiffusion(**config.model.get("params", dict())) model.load_state_dict(torch.load("models/ldm/inpainting_big/last.ckpt")["state_dict"], strict=False) diff --git a/examples/images/diffusion/scripts/knn2img.py b/examples/images/diffusion/scripts/knn2img.py index e6eaaecab..66d9aa57d 100644 --- a/examples/images/diffusion/scripts/knn2img.py +++ b/examples/images/diffusion/scripts/knn2img.py @@ -13,9 +13,10 @@ import scann import time from multiprocessing import cpu_count -from ldm.util import instantiate_from_config, parallel_data_prefetch +from ldm.util import parallel_data_prefetch from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler +from ldm.models.diffusion.ddpm import LatentDiffusion from ldm.modules.encoders.modules import FrozenClipImageEmbedder, FrozenCLIPTextEmbedder DATABASES = [ @@ -44,7 +45,7 @@ def load_model_from_config(config, ckpt, verbose=False): if "global_step" in pl_sd: print(f"Global Step: {pl_sd['global_step']}") sd = pl_sd["state_dict"] - model = instantiate_from_config(config.model) + model = LatentDiffusion(**config.model.get("params", dict())) m, u = model.load_state_dict(sd, strict=False) if len(m) > 0 and verbose: print("missing keys:") diff --git a/examples/images/diffusion/scripts/sample_diffusion.py b/examples/images/diffusion/scripts/sample_diffusion.py index 876fe3c36..a25965ef7 100644 --- a/examples/images/diffusion/scripts/sample_diffusion.py +++ b/examples/images/diffusion/scripts/sample_diffusion.py @@ -8,7 +8,6 @@ from omegaconf import OmegaConf from PIL import Image from ldm.models.diffusion.ddim import DDIMSampler -from ldm.util import instantiate_from_config rescale = lambda x: (x + 1.) / 2. @@ -218,7 +217,7 @@ def get_parser(): def load_model_from_config(config, sd): - model = instantiate_from_config(config) + model = LatentDiffusion(**config.get("params", dict())) model.load_state_dict(sd,strict=False) model.cuda() model.eval() diff --git a/examples/images/diffusion/scripts/tests/test_checkpoint.py b/examples/images/diffusion/scripts/tests/test_checkpoint.py index a32e66d44..a157d186d 100644 --- a/examples/images/diffusion/scripts/tests/test_checkpoint.py +++ b/examples/images/diffusion/scripts/tests/test_checkpoint.py @@ -9,7 +9,7 @@ from diffusers import StableDiffusionPipeline import torch from ldm.util import instantiate_from_config from main import get_parser - +from ldm.modules.diffusionmodules.openaimodel import UNetModel if __name__ == "__main__": with torch.no_grad(): yaml_path = "../../train_colossalai.yaml" @@ -17,7 +17,7 @@ if __name__ == "__main__": config = f.read() base_config = yaml.load(config, Loader=yaml.FullLoader) unet_config = base_config['model']['params']['unet_config'] - diffusion_model = instantiate_from_config(unet_config).to("cuda:0") + diffusion_model = UNetModel(**unet_config.get("params", dict())).to("cuda:0") pipe = StableDiffusionPipeline.from_pretrained( "/data/scratch/diffuser/stable-diffusion-v1-4" diff --git a/examples/images/diffusion/scripts/txt2img.py b/examples/images/diffusion/scripts/txt2img.py index 364ebac6c..b198430f6 100644 --- a/examples/images/diffusion/scripts/txt2img.py +++ b/examples/images/diffusion/scripts/txt2img.py @@ -16,9 +16,9 @@ from torch import autocast from contextlib import nullcontext from imwatermark import WatermarkEncoder -from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler +from ldm.models.diffusion.ddpm import LatentDiffusion from ldm.models.diffusion.dpm_solver import DPMSolverSampler from utils import replace_module, getModelSize @@ -35,7 +35,7 @@ def load_model_from_config(config, ckpt, verbose=False): if "global_step" in pl_sd: print(f"Global Step: {pl_sd['global_step']}") sd = pl_sd["state_dict"] - model = instantiate_from_config(config.model) + model = LatentDiffusion(**config.model.get("params", dict())) m, u = model.load_state_dict(sd, strict=False) if len(m) > 0 and verbose: print("missing keys:")