ColossalAI/colossalai/checkpoint_io/general_checkpoint_io.py

from pathlib import Path

import torch.nn as nn
from torch.optim import Optimizer

from .checkpoint_io_base import CheckpointIO

__all__ = ['GeneralCheckpointIO']


class GeneralCheckpointIO(CheckpointIO):

    def load_model(self, model: nn.Module, checkpoint: str, strict: bool = True):
        checkpoint = Path(checkpoint)
        is_sharded = self.is_sharded_checkpoint(checkpoint)

        if not is_sharded:
            checkpoint = self.load_state_dict(checkpoint)
            model.load_state_dict(checkpoint, strict=strict)
        else:
            # find the index file
            checkpoint_path = Path(checkpoint)
            index_file_path = self.get_sharded_checkpoint_index_file(checkpoint_path)

            # iterate over the shard checkpoint files
            # and load each
            shard_files = self.get_checkpoint_shard_filenames(index_file_path)
            for shard_file in shard_files:
                shard_checkpoint = self.load_state_dict(shard_file)
                model.load_state_dict(shard_checkpoint, strict=strict)

        return model

    def save_model(self,
                   model: nn.Module,
                   checkpoint: str,
                   prefix: str = None,
                   shard: bool = False,
                   size_per_shard: int = 1024):
        checkpoint = Path(checkpoint)
        if shard:
            # TODO(FrankLeeeee): implement checkpoint saving to sharded checkpoint
            raise NotImplementedError("Not implemented yet")
        else:
            self.save_checkpoint(model.state_dict(), checkpoint)

    def load_optimizer(self, optimizer: Optimizer, checkpoint: str):
        checkpoint = Path(checkpoint)
        is_sharded = self.is_sharded_checkpoint(checkpoint)

        if not is_sharded:
            checkpoint = self.load_state_dict(checkpoint)
            optimizer.load_state_dict(checkpoint)
        else:
            # TODO(FrankLeeeee): implement checkpoint loading from sharded checkpoint
            # This is not an urgent feature, so we can leave it for later
            # let's implement this when we test large-scale models
            pass
        return optimizer

    def save_optimizer(self, optimizer: Optimizer, checkpoint: str, shard: bool = False, size_per_shard: int = 1024):
        if shard:
            # TODO(FrankLeeeee): implement checkpoint saving to sharded checkpoint
            pass
        else:
            self.save_checkpoint(optimizer.state_dict(), checkpoint)
[api] implemented the checkpoint io module (#3205) * [api] implemented the checkpoint io module * polish code * polish code 2 years ago			`from pathlib import Path`

			`import torch.nn as nn`
			`from torch.optim import Optimizer`

			`from .checkpoint_io_base import CheckpointIO`

			`__all__ = ['GeneralCheckpointIO']`


			`class GeneralCheckpointIO(CheckpointIO):`

			`def load_model(self, model: nn.Module, checkpoint: str, strict: bool = True):`
			`checkpoint = Path(checkpoint)`
			`is_sharded = self.is_sharded_checkpoint(checkpoint)`

			`if not is_sharded:`
			`checkpoint = self.load_state_dict(checkpoint)`
			`model.load_state_dict(checkpoint, strict=strict)`
			`else:`
			`# find the index file`
			`checkpoint_path = Path(checkpoint)`
			`index_file_path = self.get_sharded_checkpoint_index_file(checkpoint_path)`

			`# iterate over the shard checkpoint files`
			`# and load each`
			`shard_files = self.get_checkpoint_shard_filenames(index_file_path)`
			`for shard_file in shard_files:`
			`shard_checkpoint = self.load_state_dict(shard_file)`
			`model.load_state_dict(shard_checkpoint, strict=strict)`

			`return model`

			`def save_model(self,`
			`model: nn.Module,`
			`checkpoint: str,`
			`prefix: str = None,`
			`shard: bool = False,`
			`size_per_shard: int = 1024):`
			`checkpoint = Path(checkpoint)`
			`if shard:`
			`# TODO(FrankLeeeee): implement checkpoint saving to sharded checkpoint`
			`raise NotImplementedError("Not implemented yet")`
			`else:`
			`self.save_checkpoint(model.state_dict(), checkpoint)`

			`def load_optimizer(self, optimizer: Optimizer, checkpoint: str):`
			`checkpoint = Path(checkpoint)`
			`is_sharded = self.is_sharded_checkpoint(checkpoint)`

			`if not is_sharded:`
			`checkpoint = self.load_state_dict(checkpoint)`
			`optimizer.load_state_dict(checkpoint)`
			`else:`
			`# TODO(FrankLeeeee): implement checkpoint loading from sharded checkpoint`
			`# This is not an urgent feature, so we can leave it for later`
			`# let's implement this when we test large-scale models`
			`pass`
			`return optimizer`

			`def save_optimizer(self, optimizer: Optimizer, checkpoint: str, shard: bool = False, size_per_shard: int = 1024):`
			`if shard:`
			`# TODO(FrankLeeeee): implement checkpoint saving to sharded checkpoint`
			`pass`
			`else:`
			`self.save_checkpoint(optimizer.state_dict(), checkpoint)`