theoretically add diffusion spacing

vedantroy · vedantroy · commit f08b7252a353 · 2022-08-10T19:41:58.000-07:00
diff --git a/diffusion/diffusion.py b/diffusion/diffusion.py
@@ -1,5 +1,4 @@
 from collections import namedtuple
-from types import SimpleNamespace
 from abc import ABC, abstractmethod
 
 import torch as th
@@ -81,13 +80,21 @@ def get_eps_and_var(model_output, *, C):
 
 
 class GaussianDiffusion(ABC):
-    def __init__(self, betas):
+    def __init__(self, betas, timestep_map=None):
+        # TODO: Get rid of this "check"?
+        # It's never once caught a bug ...
+        def check(x):
+            assert x.shape == (self.n_timesteps,)
+
         self.n_timesteps = betas.shape[0]
+        self.betas = betas
+        self.timestep_map = timestep_map if timestep_map else range(self.n_timesteps)
+        assert len(self.timestep_map) == self.n_timesteps
+
         alphas = 1 - betas
         alphas_cumprod = th.cumprod(alphas, dim=0)
-
-        def check(x):
-            assert x.shape == (self.n_timesteps,)
+        self.alphas_cumprod = alphas_cumprod
+        check(self.alphas_cumprod)
 
         # TODO(verify): By prepending 1, the 1st beta is 0
         # This represents the initial image, which as a mean but no variance (since it's ground truth)
@@ -231,7 +238,6 @@ def p_sample_loop_progressive(self, *, model, noise, shape, threshold, device):
         assert xor(
             noise, shape
         ), f"Either noise or shape must be specified, but not both or neither"
-        indices = list(range(self.n_timesteps))[::-1]
 
         img = N = None
         if noise:
@@ -240,8 +246,8 @@ def p_sample_loop_progressive(self, *, model, noise, shape, threshold, device):
             img = th.randn(shape, device=device)
         N = img.shape[0]
 
-        for i in indices:
-            t = th.tensor([i] * N, device=device)
+        for _t in self.timestep_map[::-1]:
+            t = th.tensor([_t] * N, device=device)
             with th.no_grad():
                 img = self.p_sample(model=model, x_t=img, t=t, threshold=threshold)
                 yield img
diff --git a/diffusion/spaced.py b/diffusion/spaced.py
@@ -0,0 +1,78 @@
+from .diffusion import LearnedVarianceGaussianDiffusion
+
+### Start OpenAI Code
+def space_timesteps(num_timesteps, section_counts):
+    """
+    Create a list of timesteps to use from an original diffusion process,
+    given the number of timesteps we want to take from equally-sized portions
+    of the original process.
+    For example, if there's 300 timesteps and the section counts are [10,15,20]
+    then the first 100 timesteps are strided to be 10 timesteps, the second 100
+    are strided to be 15 timesteps, and the final 100 are strided to be 20.
+    If the stride is a string starting with "ddim", then the fixed striding
+    from the DDIM paper is used, and only one section is allowed.
+    :param num_timesteps: the number of diffusion steps in the original
+                          process to divide up.
+    :param section_counts: either a list of numbers, or a string containing
+                           comma-separated numbers, indicating the step count
+                           per section. As a special case, use "ddimN" where N
+                           is a number of steps to use the striding from the
+                           DDIM paper.
+    :return: a set of diffusion steps from the original process to use.
+    """
+    if isinstance(section_counts, str):
+        if section_counts.startswith("ddim"):
+            desired_count = int(section_counts[len("ddim") :])
+            for i in range(1, num_timesteps):
+                if len(range(0, num_timesteps, i)) == desired_count:
+                    return set(range(0, num_timesteps, i))
+            raise ValueError(
+                f"cannot create exactly {num_timesteps} steps with an integer stride"
+            )
+        section_counts = [int(x) for x in section_counts.split(",")]
+    size_per = num_timesteps // len(section_counts)
+    extra = num_timesteps % len(section_counts)
+    start_idx = 0
+    all_steps = []
+    for i, section_count in enumerate(section_counts):
+        size = size_per + (1 if i < extra else 0)
+        if size < section_count:
+            raise ValueError(
+                f"cannot divide section of {size} steps into {section_count}"
+            )
+        if section_count <= 1:
+            frac_stride = 1
+        else:
+            frac_stride = (size - 1) / (section_count - 1)
+        cur_idx = 0.0
+        taken_steps = []
+        for _ in range(section_count):
+            taken_steps.append(start_idx + round(cur_idx))
+            cur_idx += frac_stride
+        all_steps += taken_steps
+        start_idx += size
+    return set(all_steps)
+
+
+### End OpenAI Code
+
+
+def create_map_and_betas(betas, use_timesteps):
+    use_timesteps = set(use_timesteps)
+
+    # Doesn't matter what diffusion we use since the constructor
+    # is defined in the base class
+    base_diffusion = LearnedVarianceGaussianDiffusion(betas)
+    as_t_m_1 = 1
+
+    map_generation_step_to_timestep = []
+
+    new_betas = []
+    for i, as_t in enumerate(base_diffusion.alphas_cumprod):
+        if i in use_timesteps:
+            new_betas.append(1 - (as_t / as_t_m_1))
+            as_t_m_1 = as_t
+            map_generation_step_to_timestep.append(i)
+
+    assert len(new_betas) == len(map_generation_step_to_timestep)
+    return map_generation_step_to_timestep, new_betas
diff --git a/iddpm.py b/iddpm.py
@@ -49,13 +49,14 @@ class DiffusionParams(hp.Hparams):
     schedule: str = hp.required("diffusion schedule")
     learn_sigma: bool = hp.required("whether to learn sigma")
 
-    def initialize_object(self):
+    def initialize_object(self, diffusion_kwargs):
         assert self.schedule == "cosine", "Only cosine schedule is supported"
-        betas = cosine_betas(self.steps)
+        if not diffusion_kwargs:
+            diffusion_kwargs = {"betas": cosine_betas(self.steps)}
         return (
-            LearnedVarianceGaussianDiffusion(betas)
+            LearnedVarianceGaussianDiffusion(**diffusion_kwargs)
             if self.learn_sigma
-            else FixedSmallVarianceGaussianDiffusion(betas)
+            else FixedSmallVarianceGaussianDiffusion(**diffusion_kwargs)
         )
 
 
@@ -64,10 +65,10 @@ class IDDPMConfig(hp.Hparams):
     unet: UNetParams = hp.required("the UNet model")
     diffusion: DiffusionParams = hp.required("Gaussian diffusion parameters")
 
-    def initialize_object(self):
+    def initialize_object(self, diffusion_kwargs=None):
         unet, diffusion = (
             self.unet.initialize_object(),
-            self.diffusion.initialize_object(),
+            self.diffusion.initialize_object(diffusion_kwargs),
         )
         return IDDPM(unet, diffusion)
 
diff --git a/sample.py b/sample.py
@@ -4,8 +4,9 @@
 import torchvision
 import torch as th
 import typer
+from diffusion.spaced import create_map_and_betas, space_timesteps
 
-from iddpm import IDDPMConfig, IDDPM
+from iddpm import IDDPMConfig
 
 
 def img_to_bytes(img):
@@ -18,11 +19,19 @@ def run(
     out_dir: Path = typer.Option(...),
     checkpoint: Path = typer.Option(...),
     samples: int = typer.Option(...),
+    spacing: str = typer.Option(default=None),
 ):
     assert checkpoint.is_file(), f"Checkpoint file not found: {checkpoint}"
 
     config = IDDPMConfig.create(config, None, cli_args=False)
-    iddpm = config.initialize_object()
+
+    spacing = [1] if spacing is None else [int(x) for x in spacing.split(",")]
+    spacing = space_timesteps(iddpm.diffusion.n_timesteps, spacing)
+    timestep_map, betas = create_map_and_betas(iddpm.diffusion.betas, spacing)
+
+    iddpm = config.initialize_object(
+        diffusion=dict(timestep_map=timestep_map, betas=betas)
+    )
 
     out_dir.mkdir(parents=True)