From 0763d366ec53137a43cfd518ea9781f40e8306d1 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Tue, 23 Aug 2022 12:30:08 -0600
Subject: [PATCH 01/17] Added Apple Silicon MPS Support

---
 PatchFile                       |  11 +
 environment-mac.yaml            |  29 +++
 ldm/models/diffusion/ddim.py    |  10 +-
 ldm/models/diffusion/plms.py    |  10 +-
 ldm/modules/encoders/modules.py |  23 +-
 notebook_helpers.py             |   8 +-
 scripts/img2img.py              |  16 +-
 scripts/inpaint.py              |   7 +-
 scripts/knn2img.py              | 406 ++++++++++++++++++++++++++++++++
 scripts/txt2img.py              |  17 +-
 10 files changed, 518 insertions(+), 19 deletions(-)
 create mode 100644 PatchFile
 create mode 100644 environment-mac.yaml

diff --git a/PatchFile b/PatchFile
new file mode 100644
index 000000000..578eb1385
--- /dev/null
+++ b/PatchFile
@@ -0,0 +1,11 @@
+--- functional.py	2022-08-23 12:11:18.000000000 -0600
++++ /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py	2022-08-23 12:11:38.000000000 -0600
+@@ -2508,7 +2508,7 @@
+         return handle_torch_function(
+             layer_norm, (input, weight, bias), input, normalized_shape, weight=weight, bias=bias, eps=eps
+         )
+-    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
++    return torch.layer_norm(input.contigous(), normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
+ 
+ 
+ def group_norm(
diff --git a/environment-mac.yaml b/environment-mac.yaml
new file mode 100644
index 000000000..97f3cf564
--- /dev/null
+++ b/environment-mac.yaml
@@ -0,0 +1,29 @@
+name: ldm
+channels:
+  - pytorch-nightly
+  - defaults
+dependencies:
+  - python=3.10.4
+  - pip=22.1.2
+  - pytorch
+  - torchvision
+  - numpy=1.23.1
+  - pip:
+    - albumentations==0.4.6
+    - diffusers
+    - opencv-python==4.6.0.66
+    - pudb==2019.2
+    - imageio==2.9.0
+    - imageio-ffmpeg==0.4.2
+    - pytorch-lightning==1.4.2
+    - omegaconf==2.1.1
+    - test-tube>=0.7.5
+    - streamlit>=0.73.1
+    - einops==0.3.0
+    - torch-fidelity==0.3.0
+    - transformers==4.19.2
+    - torchmetrics==0.6.0
+    - kornia==0.6
+    - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
+    - -e git+https://github.com/openai/CLIP.git@main#egg=clip
+    - -e .
diff --git a/ldm/models/diffusion/ddim.py b/ldm/models/diffusion/ddim.py
index fb31215db..1ab73186a 100644
--- a/ldm/models/diffusion/ddim.py
+++ b/ldm/models/diffusion/ddim.py
@@ -15,11 +15,17 @@ def __init__(self, model, schedule="linear", **kwargs):
         self.model = model
         self.ddpm_num_timesteps = model.num_timesteps
         self.schedule = schedule
+        if(torch.cuda.is_available()):
+            self.device_available = "cuda"
+        elif(torch.backends.mps.is_available()):
+            self.device_available = "mps"
+        else:
+            self.device_available = "cpu"
 
     def register_buffer(self, name, attr):
         if type(attr) == torch.Tensor:
-            if attr.device != torch.device("cuda"):
-                attr = attr.to(torch.device("cuda"))
+            if attr.device != torch.device(self.device_available):
+                attr = attr.to(torch.float32).to(torch.device(self.device_available))
         setattr(self, name, attr)
 
     def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py
index 78eeb1003..1d48a42c9 100644
--- a/ldm/models/diffusion/plms.py
+++ b/ldm/models/diffusion/plms.py
@@ -14,11 +14,17 @@ def __init__(self, model, schedule="linear", **kwargs):
         self.model = model
         self.ddpm_num_timesteps = model.num_timesteps
         self.schedule = schedule
+        if(torch.cuda.is_available()):
+            self.device_available = "cuda"
+        elif(torch.backends.mps.is_available()):
+            self.device_available = "mps"
+        else:
+            self.device_available = "cpu"
 
     def register_buffer(self, name, attr):
         if type(attr) == torch.Tensor:
-            if attr.device != torch.device("cuda"):
-                attr = attr.to(torch.device("cuda"))
+            if attr.device != torch.device(self.device_available):
+                attr = attr.to(torch.float32).to(torch.device(self.device_available))
         setattr(self, name, attr)
 
     def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py
index ededbe43e..a520b93df 100644
--- a/ldm/modules/encoders/modules.py
+++ b/ldm/modules/encoders/modules.py
@@ -9,6 +9,15 @@
 from ldm.modules.x_transformer import Encoder, TransformerWrapper  # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test
 
 
+def get_default_device_type():
+    if torch.cuda.is_available():
+        return "cuda"
+    elif torch.backends.mps.is_available():
+        return "mps"
+    else:
+        return "cpu"
+
+
 class AbstractEncoder(nn.Module):
     def __init__(self):
         super().__init__()
@@ -35,7 +44,7 @@ def forward(self, batch, key=None):
 
 class TransformerEmbedder(AbstractEncoder):
     """Some transformer encoder layers"""
-    def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device="cuda"):
+    def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device=get_default_device_type()):
         super().__init__()
         self.device = device
         self.transformer = TransformerWrapper(num_tokens=vocab_size, max_seq_len=max_seq_len,
@@ -52,7 +61,7 @@ def encode(self, x):
 
 class BERTTokenizer(AbstractEncoder):
     """ Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)"""
-    def __init__(self, device="cuda", vq_interface=True, max_length=77):
+    def __init__(self, device=get_default_device_type(), vq_interface=True, max_length=77):
         super().__init__()
         from transformers import BertTokenizerFast  # TODO: add to reuquirements
         self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
@@ -80,7 +89,7 @@ def decode(self, text):
 class BERTEmbedder(AbstractEncoder):
     """Uses the BERT tokenizr model and add some transformer encoder layers"""
     def __init__(self, n_embed, n_layer, vocab_size=30522, max_seq_len=77,
-                 device="cuda",use_tokenizer=True, embedding_dropout=0.0):
+                 device=get_default_device_type(),use_tokenizer=True, embedding_dropout=0.0):
         super().__init__()
         self.use_tknz_fn = use_tokenizer
         if self.use_tknz_fn:
@@ -136,7 +145,7 @@ def encode(self, x):
 
 class FrozenCLIPEmbedder(AbstractEncoder):
     """Uses the CLIP transformer encoder for text (from Hugging Face)"""
-    def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77):
+    def __init__(self, version="openai/clip-vit-large-patch14", device=get_default_device_type(), max_length=77):
         super().__init__()
         self.tokenizer = CLIPTokenizer.from_pretrained(version)
         self.transformer = CLIPTextModel.from_pretrained(version)
@@ -166,9 +175,9 @@ class FrozenCLIPTextEmbedder(nn.Module):
     """
     Uses the CLIP transformer encoder for text.
     """
-    def __init__(self, version='ViT-L/14', device="cuda", max_length=77, n_repeat=1, normalize=True):
+    def __init__(self, version='ViT-L/14', device=get_default_device_type(), max_length=77, n_repeat=1, normalize=True):
         super().__init__()
-        self.model, _ = clip.load(version, jit=False, device="cpu")
+        self.model, _ = clip.load(version, jit=False, device=device)
         self.device = device
         self.max_length = max_length
         self.n_repeat = n_repeat
@@ -202,7 +211,7 @@ def __init__(
             self,
             model,
             jit=False,
-            device='cuda' if torch.cuda.is_available() else 'cpu',
+            device=get_default_device_type(),
             antialias=False,
         ):
         super().__init__()
diff --git a/notebook_helpers.py b/notebook_helpers.py
index 5d0ebd7e1..1ebd8d21f 100644
--- a/notebook_helpers.py
+++ b/notebook_helpers.py
@@ -117,7 +117,13 @@ def get_cond(mode, selected_path):
         c = rearrange(c, '1 c h w -> 1 h w c')
         c = 2. * c - 1.
 
-        c = c.to(torch.device("cuda"))
+        if(torch.cuda.is_available()):
+            device = torch.device("cuda")
+        elif(torch.backends.mps.is_available()):
+            device = torch.device("mps")
+        else:
+            device = torch.device("cpu")
+        c = c.to(device)
         example["LR_image"] = c
         example["image"] = c_up
 
diff --git a/scripts/img2img.py b/scripts/img2img.py
index 421e2151d..40bced669 100644
--- a/scripts/img2img.py
+++ b/scripts/img2img.py
@@ -19,6 +19,14 @@
 from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.models.diffusion.plms import PLMSSampler
 
+def get_device():
+    if(torch.cuda.is_available()):
+        return torch.device("cuda")
+    elif(torch.backends.mps.is_available()):
+        return torch.device("mps")
+    else:
+        return torch.device("cpu")
+
 
 def chunk(it, size):
     it = iter(it)
@@ -40,7 +48,7 @@ def load_model_from_config(config, ckpt, verbose=False):
         print("unexpected keys:")
         print(u)
 
-    model.cuda()
+    model.to(get_device())
     model.eval()
     return model
 
@@ -199,7 +207,7 @@ def main():
     config = OmegaConf.load(f"{opt.config}")
     model = load_model_from_config(config, f"{opt.ckpt}")
 
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    device = get_device()
     model = model.to(device)
 
     if opt.plms:
@@ -241,8 +249,10 @@ def main():
     print(f"target t_enc is {t_enc} steps")
 
     precision_scope = autocast if opt.precision == "autocast" else nullcontext
+    if device.type == 'mps':
+        precision_scope = nullcontext # have to use f32 on mps
     with torch.no_grad():
-        with precision_scope("cuda"):
+        with precision_scope(device.type):
             with model.ema_scope():
                 tic = time.time()
                 all_samples = list()
diff --git a/scripts/inpaint.py b/scripts/inpaint.py
index d6e6387a9..cb812dc1e 100644
--- a/scripts/inpaint.py
+++ b/scripts/inpaint.py
@@ -61,7 +61,12 @@ def make_batch(image, mask, device):
     model.load_state_dict(torch.load("models/ldm/inpainting_big/last.ckpt")["state_dict"],
                           strict=False)
 
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    if(torch.cuda.is_available()):
+        device = torch.device("cuda")
+    elif(torch.backends.mps.is_available()):
+        device = torch.device("mps")
+    else:
+        device = torch.device("cpu")
     model = model.to(device)
     sampler = DDIMSampler(model)
 
diff --git a/scripts/knn2img.py b/scripts/knn2img.py
index e6eaaecab..a321a95e0 100644
--- a/scripts/knn2img.py
+++ b/scripts/knn2img.py
@@ -31,6 +31,412 @@
     "artbench-surrealism",
     "artbench-ukiyo_e",
 ]
+aimport argparse, os, sys, glob
+import clip
+import torch
+import torch.nn as nn
+import numpy as np
+from omegaconf import OmegaConf
+from PIL import Image
+from tqdm import tqdm, trange
+from itertools import islice
+from einops import rearrange, repeat
+from torchvision.utils import make_grid
+import scann
+import time
+from multiprocessing import cpu_count
+
+from ldm.util import instantiate_from_config, parallel_data_prefetch
+from ldm.models.diffusion.ddim import DDIMSampler
+from ldm.models.diffusion.plms import PLMSSampler
+from ldm.modules.encoders.modules import FrozenClipImageEmbedder, FrozenCLIPTextEmbedder
+
+DATABASES = [
+    "openimages",
+    "artbench-art_nouveau",
+    "artbench-baroque",
+    "artbench-expressionism",
+    "artbench-impressionism",
+    "artbench-post_impressionism",
+    "artbench-realism",
+    "artbench-romanticism",
+    "artbench-renaissance",
+    "artbench-surrealism",
+    "artbench-ukiyo_e",
+]
+
+
+def get_device():
+    if(torch.cuda.is_available()):
+        return 'cuda'
+    elif(torch.backends.mps.is_available()):
+        return 'mps'
+    else:
+        return 'cpu'
+
+
+def chunk(it, size):
+    it = iter(it)
+    return iter(lambda: tuple(islice(it, size)), ())
+
+
+def load_model_from_config(config, ckpt, verbose=False):
+    print(f"Loading model from {ckpt}")
+    pl_sd = torch.load(ckpt, map_location="cpu")
+    if "global_step" in pl_sd:
+        print(f"Global Step: {pl_sd['global_step']}")
+    sd = pl_sd["state_dict"]
+    model = instantiate_from_config(config.model)
+    m, u = model.load_state_dict(sd, strict=False)
+    if len(m) > 0 and verbose:
+        print("missing keys:")
+        print(m)
+    if len(u) > 0 and verbose:
+        print("unexpected keys:")
+        print(u)
+
+    model.to(get_device())
+    model.eval()
+    return model
+
+
+class Searcher(object):
+    def __init__(self, database, retriever_version='ViT-L/14'):
+        assert database in DATABASES
+        # self.database = self.load_database(database)
+        self.database_name = database
+        self.searcher_savedir = f'data/rdm/searchers/{self.database_name}'
+        self.database_path = f'data/rdm/retrieval_databases/{self.database_name}'
+        self.retriever = self.load_retriever(version=retriever_version)
+        self.database = {'embedding': [],
+                         'img_id': [],
+                         'patch_coords': []}
+        self.load_database()
+        self.load_searcher()
+
+    def train_searcher(self, k,
+                       metric='dot_product',
+                       searcher_savedir=None):
+
+        print('Start training searcher')
+        searcher = scann.scann_ops_pybind.builder(self.database['embedding'] /
+                                                  np.linalg.norm(self.database['embedding'], axis=1)[:, np.newaxis],
+                                                  k, metric)
+        self.searcher = searcher.score_brute_force().build()
+        print('Finish training searcher')
+
+        if searcher_savedir is not None:
+            print(f'Save trained searcher under "{searcher_savedir}"')
+            os.makedirs(searcher_savedir, exist_ok=True)
+            self.searcher.serialize(searcher_savedir)
+
+    def load_single_file(self, saved_embeddings):
+        compressed = np.load(saved_embeddings)
+        self.database = {key: compressed[key] for key in compressed.files}
+        print('Finished loading of clip embeddings.')
+
+    def load_multi_files(self, data_archive):
+        out_data = {key: [] for key in self.database}
+        for d in tqdm(data_archive, desc=f'Loading datapool from {len(data_archive)} individual files.'):
+            for key in d.files:
+                out_data[key].append(d[key])
+
+        return out_data
+
+    def load_database(self):
+
+        print(f'Load saved patch embedding from "{self.database_path}"')
+        file_content = glob.glob(os.path.join(self.database_path, '*.npz'))
+
+        if len(file_content) == 1:
+            self.load_single_file(file_content[0])
+        elif len(file_content) > 1:
+            data = [np.load(f) for f in file_content]
+            prefetched_data = parallel_data_prefetch(self.load_multi_files, data,
+                                                     n_proc=min(len(data), cpu_count()), target_data_type='dict')
+
+            self.database = {key: np.concatenate([od[key] for od in prefetched_data], axis=1)[0] for key in
+                             self.database}
+        else:
+            raise ValueError(f'No npz-files in specified path "{self.database_path}" is this directory existing?')
+
+        print(f'Finished loading of retrieval database of length {self.database["embedding"].shape[0]}.')
+
+    def load_retriever(self, version='ViT-L/14', ):
+        model = FrozenClipImageEmbedder(model=version)
+        model.to(get_device())
+        model.eval()
+        return model
+
+    def load_searcher(self):
+        print(f'load searcher for database {self.database_name} from {self.searcher_savedir}')
+        self.searcher = scann.scann_ops_pybind.load_searcher(self.searcher_savedir)
+        print('Finished loading searcher.')
+
+    def search(self, x, k):
+        if self.searcher is None and self.database['embedding'].shape[0] < 2e4:
+            self.train_searcher(k)   # quickly fit searcher on the fly for small databases
+        assert self.searcher is not None, 'Cannot search with uninitialized searcher'
+        if isinstance(x, torch.Tensor):
+            x = x.detach().cpu().numpy()
+        if len(x.shape) == 3:
+            x = x[:, 0]
+        query_embeddings = x / np.linalg.norm(x, axis=1)[:, np.newaxis]
+
+        start = time.time()
+        nns, distances = self.searcher.search_batched(query_embeddings, final_num_neighbors=k)
+        end = time.time()
+
+        out_embeddings = self.database['embedding'][nns]
+        out_img_ids = self.database['img_id'][nns]
+        out_pc = self.database['patch_coords'][nns]
+
+        out = {'nn_embeddings': out_embeddings / np.linalg.norm(out_embeddings, axis=-1)[..., np.newaxis],
+               'img_ids': out_img_ids,
+               'patch_coords': out_pc,
+               'queries': x,
+               'exec_time': end - start,
+               'nns': nns,
+               'q_embeddings': query_embeddings}
+
+        return out
+
+    def __call__(self, x, n):
+        return self.search(x, n)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    # TODO: add n_neighbors and modes (text-only, text-image-retrieval, image-image retrieval etc)
+    # TODO: add 'image variation' mode when knn=0 but a single image is given instead of a text prompt?
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        nargs="?",
+        default="a painting of a virus monster playing guitar",
+        help="the prompt to render"
+    )
+
+    parser.add_argument(
+        "--outdir",
+        type=str,
+        nargs="?",
+        help="dir to write results to",
+        default="outputs/txt2img-samples"
+    )
+
+    parser.add_argument(
+        "--skip_grid",
+        action='store_true',
+        help="do not save a grid, only individual samples. Helpful when evaluating lots of samples",
+    )
+
+    parser.add_argument(
+        "--ddim_steps",
+        type=int,
+        default=50,
+        help="number of ddim sampling steps",
+    )
+
+    parser.add_argument(
+        "--n_repeat",
+        type=int,
+        default=1,
+        help="number of repeats in CLIP latent space",
+    )
+
+    parser.add_argument(
+        "--plms",
+        action='store_true',
+        help="use plms sampling",
+    )
+
+    parser.add_argument(
+        "--ddim_eta",
+        type=float,
+        default=0.0,
+        help="ddim eta (eta=0.0 corresponds to deterministic sampling",
+    )
+    parser.add_argument(
+        "--n_iter",
+        type=int,
+        default=1,
+        help="sample this often",
+    )
+
+    parser.add_argument(
+        "--H",
+        type=int,
+        default=768,
+        help="image height, in pixel space",
+    )
+
+    parser.add_argument(
+        "--W",
+        type=int,
+        default=768,
+        help="image width, in pixel space",
+    )
+
+    parser.add_argument(
+        "--n_samples",
+        type=int,
+        default=3,
+        help="how many samples to produce for each given prompt. A.k.a batch size",
+    )
+
+    parser.add_argument(
+        "--n_rows",
+        type=int,
+        default=0,
+        help="rows in the grid (default: n_samples)",
+    )
+
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=5.0,
+        help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))",
+    )
+
+    parser.add_argument(
+        "--from-file",
+        type=str,
+        help="if specified, load prompts from this file",
+    )
+
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="configs/retrieval-augmented-diffusion/768x768.yaml",
+        help="path to config which constructs model",
+    )
+
+    parser.add_argument(
+        "--ckpt",
+        type=str,
+        default="models/rdm/rdm768x768/model.ckpt",
+        help="path to checkpoint of model",
+    )
+
+    parser.add_argument(
+        "--clip_type",
+        type=str,
+        default="ViT-L/14",
+        help="which CLIP model to use for retrieval and NN encoding",
+    )
+    parser.add_argument(
+        "--database",
+        type=str,
+        default='artbench-surrealism',
+        choices=DATABASES,
+        help="The database used for the search, only applied when --use_neighbors=True",
+    )
+    parser.add_argument(
+        "--use_neighbors",
+        default=False,
+        action='store_true',
+        help="Include neighbors in addition to text prompt for conditioning",
+    )
+    parser.add_argument(
+        "--knn",
+        default=10,
+        type=int,
+        help="The number of included neighbors, only applied when --use_neighbors=True",
+    )
+
+    opt = parser.parse_args()
+
+    config = OmegaConf.load(f"{opt.config}")
+    model = load_model_from_config(config, f"{opt.ckpt}")
+
+    device = torch.device(get_device())
+    model = model.to(device)
+
+    clip_text_encoder = FrozenCLIPTextEmbedder(opt.clip_type).to(device)
+
+    if opt.plms:
+        sampler = PLMSSampler(model)
+    else:
+        sampler = DDIMSampler(model)
+
+    os.makedirs(opt.outdir, exist_ok=True)
+    outpath = opt.outdir
+
+    batch_size = opt.n_samples
+    n_rows = opt.n_rows if opt.n_rows > 0 else batch_size
+    if not opt.from_file:
+        prompt = opt.prompt
+        assert prompt is not None
+        data = [batch_size * [prompt]]
+
+    else:
+        print(f"reading prompts from {opt.from_file}")
+        with open(opt.from_file, "r") as f:
+            data = f.read().splitlines()
+            data = list(chunk(data, batch_size))
+
+    sample_path = os.path.join(outpath, "samples")
+    os.makedirs(sample_path, exist_ok=True)
+    base_count = len(os.listdir(sample_path))
+    grid_count = len(os.listdir(outpath)) - 1
+
+    print(f"sampling scale for cfg is {opt.scale:.2f}")
+
+    searcher = None
+    if opt.use_neighbors:
+        searcher = Searcher(opt.database)
+
+    with torch.no_grad():
+        with model.ema_scope():
+            for n in trange(opt.n_iter, desc="Sampling"):
+                all_samples = list()
+                for prompts in tqdm(data, desc="data"):
+                    print("sampling prompts:", prompts)
+                    if isinstance(prompts, tuple):
+                        prompts = list(prompts)
+                    c = clip_text_encoder.encode(prompts)
+                    uc = None
+                    if searcher is not None:
+                        nn_dict = searcher(c, opt.knn)
+                        c = torch.cat([c, torch.from_numpy(nn_dict['nn_embeddings']).to(device)], dim=1)
+                    if opt.scale != 1.0:
+                        uc = torch.zeros_like(c)
+                    if isinstance(prompts, tuple):
+                        prompts = list(prompts)
+                    shape = [16, opt.H // 16, opt.W // 16]  # note: currently hardcoded for f16 model
+                    samples_ddim, _ = sampler.sample(S=opt.ddim_steps,
+                                                     conditioning=c,
+                                                     batch_size=c.shape[0],
+                                                     shape=shape,
+                                                     verbose=False,
+                                                     unconditional_guidance_scale=opt.scale,
+                                                     unconditional_conditioning=uc,
+                                                     eta=opt.ddim_eta,
+                                                     )
+
+                    x_samples_ddim = model.decode_first_stage(samples_ddim)
+                    x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
+
+                    for x_sample in x_samples_ddim:
+                        x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
+                        Image.fromarray(x_sample.astype(np.uint8)).save(
+                            os.path.join(sample_path, f"{base_count:05}.png"))
+                        base_count += 1
+                    all_samples.append(x_samples_ddim)
+
+                if not opt.skip_grid:
+                    # additionally, save as grid
+                    grid = torch.stack(all_samples, 0)
+                    grid = rearrange(grid, 'n b c h w -> (n b) c h w')
+                    grid = make_grid(grid, nrow=n_rows)
+
+                    # to image
+                    grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
+                    Image.fromarray(grid.astype(np.uint8)).save(os.path.join(outpath, f'grid-{grid_count:04}.png'))
+                    grid_count += 1
+
+    print(f"Your samples are ready and waiting for you here: \n{outpath} \nEnjoy.")
 
 
 def chunk(it, size):
diff --git a/scripts/txt2img.py b/scripts/txt2img.py
index 59c16a1db..8ef338877 100644
--- a/scripts/txt2img.py
+++ b/scripts/txt2img.py
@@ -18,6 +18,15 @@
 from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.models.diffusion.plms import PLMSSampler
 
+def get_device():
+    if(torch.cuda.is_available()):
+        return 'cuda'
+    elif(torch.backends.mps.is_available()):
+        return 'mps'
+    else:
+        return 'cpu'
+
+
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from transformers import AutoFeatureExtractor
 
@@ -60,7 +69,7 @@ def load_model_from_config(config, ckpt, verbose=False):
         print("unexpected keys:")
         print(u)
 
-    model.cuda()
+    model.to(get_device())
     model.eval()
     return model
 
@@ -239,7 +248,7 @@ def main():
     config = OmegaConf.load(f"{opt.config}")
     model = load_model_from_config(config, f"{opt.ckpt}")
 
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    device = torch.device(get_device())
     model = model.to(device)
 
     if opt.plms:
@@ -278,8 +287,10 @@ def main():
         start_code = torch.randn([opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device=device)
 
     precision_scope = autocast if opt.precision=="autocast" else nullcontext
+    if device.type == 'mps':
+        precision_scope = nullcontext # have to use f32 on mps
     with torch.no_grad():
-        with precision_scope("cuda"):
+        with precision_scope(device.type):
             with model.ema_scope():
                 tic = time.time()
                 all_samples = list()

From 9d515635e55e10468f7bc617f6b8bf86e2b70b74 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Tue, 23 Aug 2022 12:37:11 -0600
Subject: [PATCH 02/17] Updated Readme with Apple Silicon instructions

---
 README.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/README.md b/README.md
index c9e6c3bb1..1a4656c3b 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,72 @@
+# Apple Silicon Mac Users
+
+These are Apple Silicon instructions. I haven't tested any of this on Intel Macs.
+
+Please discuss issues with Apple Silicon [here](https://github.com/CompVis/stable-diffusion/issues/25).
+
+How to:
+
+```
+git clone https://github.com/magnusviri/stable-diffusion.git
+cd stable-diffusion
+git checkout apple-silicon-mps-support
+```
+
+Follow the normal instructions below but instead of running `conda env create -f environment.yaml` run `conda env create -f environment-mac.yaml`.
+
+After you follow all the instructions, if you run txt2img.py you might get an error like the following.
+
+```
+  File "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py", line 2511, in layer_norm
+    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
+RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
+```
+
+If you get that error, do one of the following.
+
+### Patch
+
+Copy the file that has the error (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py"). Use that path below.
+
+```
+patch /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py < PatchFile
+```
+
+If you get this error:
+
+```
+patch: **** Can't rename file /tmp/po3mqAhy to /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py : Permission denied
+```
+
+Just run the patch again but with sudo, like this.
+
+```
+sudo patch /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py < PatchFile
+```
+
+### Or Manual Fix
+
+Instead of patching you can manually edit the file it says (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py") and find the line it says (2511 in my case), and add ".contiguous()" to the end of "input". Like this.
+
+```python
+    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
+```
+
+to
+
+```python
+    return torch.layer_norm(input.contiguous(), normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
+```
+
+### Still slow?
+
+Finally! You should also reduce the number of samples or else it will still take forever. Like this.
+
+```bash
+python scripts/txt2img.py --prompt "ocean" --plms --n_samples=1 --n_rows=1 --n_iter=1
+```
+
+
 # Stable Diffusion
 *Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*
 

From 3218a47b8633a5824a64653a40d8fd66b331d680 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Tue, 23 Aug 2022 12:38:35 -0600
Subject: [PATCH 03/17] Updated Readme with Apple Silicon instructions

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1a4656c3b..b794c3acb 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ If you get that error, do one of the following.
 
 ### Patch
 
-Copy the file that has the error (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py"). Use that path below.
+Select the text for the file that has the error (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py"). Copy the path and use it in the command below.
 
 ```
 patch /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py < PatchFile

From 2f3f72ae0eb2152044a9b6841155d775cdcf6a56 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Tue, 23 Aug 2022 12:39:08 -0600
Subject: [PATCH 04/17] Updated Readme with Apple Silicon instructions

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b794c3acb..6db8ad8c3 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Follow the normal instructions below but instead of running `conda env create -f
 After you follow all the instructions, if you run txt2img.py you might get an error like the following.
 
 ```
-  File "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py", line 2511, in layer_norm
+  File "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
     return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
 RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
 ```

From 7893a1e5fbd9a16c1f07759b6f19dc32bb869486 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Tue, 23 Aug 2022 12:39:25 -0600
Subject: [PATCH 05/17] Updated Readme with Apple Silicon instructions

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6db8ad8c3..2b5e86baf 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ If you get that error, do one of the following.
 
 ### Patch
 
-Select the text for the file that has the error (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py"). Copy the path and use it in the command below.
+Select the text for the file that has the error (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py"). Copy the path and use it in the command below.
 
 ```
 patch /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py < PatchFile

From 6be63b5bbe95f04ad480e172d40994ecbe242b21 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Tue, 23 Aug 2022 13:53:31 -0600
Subject: [PATCH 06/17] Fixed contiguous typo

---
 PatchFile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PatchFile b/PatchFile
index 578eb1385..19883d9f4 100644
--- a/PatchFile
+++ b/PatchFile
@@ -5,7 +5,7 @@
              layer_norm, (input, weight, bias), input, normalized_shape, weight=weight, bias=bias, eps=eps
          )
 -    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
-+    return torch.layer_norm(input.contigous(), normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
++    return torch.layer_norm(input.contiguous(), normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
  
  
  def group_norm(

From 7b6ef90aeaa0cfd5c3f54df3d8d8eb14d782c641 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Wed, 24 Aug 2022 11:30:39 -0600
Subject: [PATCH 07/17] Added watermark requirement

---
 environment-mac.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment-mac.yaml b/environment-mac.yaml
index 97f3cf564..22fd28b0f 100644
--- a/environment-mac.yaml
+++ b/environment-mac.yaml
@@ -13,6 +13,7 @@ dependencies:
     - diffusers
     - opencv-python==4.6.0.66
     - pudb==2019.2
+    - invisible-watermark
     - imageio==2.9.0
     - imageio-ffmpeg==0.4.2
     - pytorch-lightning==1.4.2

From 4490f2a49a88c8f3944c50406a3994e0965a4a2c Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Fri, 26 Aug 2022 13:46:36 -0600
Subject: [PATCH 08/17] Changed default n_samples to 1 and n_iter to 1

---
 PatchFile          | 11 -----------
 scripts/txt2img.py |  4 ++--
 2 files changed, 2 insertions(+), 13 deletions(-)
 delete mode 100644 PatchFile

diff --git a/PatchFile b/PatchFile
deleted file mode 100644
index 19883d9f4..000000000
--- a/PatchFile
+++ /dev/null
@@ -1,11 +0,0 @@
---- functional.py	2022-08-23 12:11:18.000000000 -0600
-+++ /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py	2022-08-23 12:11:38.000000000 -0600
-@@ -2508,7 +2508,7 @@
-         return handle_torch_function(
-             layer_norm, (input, weight, bias), input, normalized_shape, weight=weight, bias=bias, eps=eps
-         )
--    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
-+    return torch.layer_norm(input.contiguous(), normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
- 
- 
- def group_norm(
diff --git a/scripts/txt2img.py b/scripts/txt2img.py
index 8ef338877..1a19fc890 100644
--- a/scripts/txt2img.py
+++ b/scripts/txt2img.py
@@ -160,7 +160,7 @@ def main():
     parser.add_argument(
         "--n_iter",
         type=int,
-        default=2,
+        default=1,
         help="sample this often",
     )
     parser.add_argument(
@@ -190,7 +190,7 @@ def main():
     parser.add_argument(
         "--n_samples",
         type=int,
-        default=3,
+        default=1,
         help="how many samples to produce for each given prompt. A.k.a. batch size",
     )
     parser.add_argument(

From 2c6090098b5c9938ad8c7056387d622bcd1f4997 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Fri, 26 Aug 2022 13:47:23 -0600
Subject: [PATCH 09/17] Removed the need for the patch file by calling
 contiguous elsewhere

---
 ldm/modules/attention.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py
index f4eff39cc..dd2bcfe88 100644
--- a/ldm/modules/attention.py
+++ b/ldm/modules/attention.py
@@ -209,6 +209,7 @@ def forward(self, x, context=None):
         return checkpoint(self._forward, (x, context), self.parameters(), self.checkpoint)
 
     def _forward(self, x, context=None):
+        x = x.contiguous()
         x = self.attn1(self.norm1(x)) + x
         x = self.attn2(self.norm2(x), context=context) + x
         x = self.ff(self.norm3(x)) + x

From 49586675e1160ac23b3dcbdac58ab2fc397fd1a4 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Fri, 26 Aug 2022 13:48:09 -0600
Subject: [PATCH 10/17] Added apple and conda-forge channels, which fixes
 crashes

---
 environment-mac.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/environment-mac.yaml b/environment-mac.yaml
index 22fd28b0f..f8f71be1e 100644
--- a/environment-mac.yaml
+++ b/environment-mac.yaml
@@ -1,5 +1,7 @@
 name: ldm
 channels:
+  - apple
+  - conda-forge
   - pytorch-nightly
   - defaults
 dependencies:

From c6ea5ceefff43a10d138e2128e5ef15202cdf5b3 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Fri, 26 Aug 2022 21:20:38 -0600
Subject: [PATCH 11/17] Added lots more info

---
 README.md | 120 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 87 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 2b5e86baf..1b7e421b6 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
 # Apple Silicon Mac Users
 
-These are Apple Silicon instructions. I haven't tested any of this on Intel Macs.
+Several people have gotten Stable Diffusion to work on Apple Silicon Macs using Anaconda. I've gathered up most of their instructions and put them in this fork (and readme). I haven't tested anything besides Anaconda, and I've read about issues with things like miniforge, so it's best if you have an issue that isn't dealt with in this fork head on over to the [Apple Silicon](https://github.com/CompVis/stable-diffusion/issues/25) issue on GitHub (that page is so long that GitHub hides most of it by default, so you need to find the hidden part and expand it to view the whole thing). This fork would not have been possible without the work done by the people on that issue.
 
-Please discuss issues with Apple Silicon [here](https://github.com/CompVis/stable-diffusion/issues/25).
+You have to have macOS 12.3 Monterey or later. Anything earlier than that won't work.
+
+BTW, I haven't tested any of this on Intel Macs.
 
 How to:
 
@@ -10,62 +12,114 @@ How to:
 git clone https://github.com/magnusviri/stable-diffusion.git
 cd stable-diffusion
 git checkout apple-silicon-mps-support
+
+mkdir -p models/ldm/stable-diffusion-v1/
+ln -s /path/to/ckpt/sd-v1-1.ckpt models/ldm/stable-diffusion-v1/model.ckpt
+
+conda env create -f environment-mac.yaml
+conda activate ldm
 ```
 
-Follow the normal instructions below but instead of running `conda env create -f environment.yaml` run `conda env create -f environment-mac.yaml`.
+These instructions are identical to the main repo except I added environment-mac.yaml because Mac doesn't have cudatoolkit.
 
-After you follow all the instructions, if you run txt2img.py you might get an error like the following.
+After you follow all the instructions and run txt2img.py you might get several errors. Here's the errors I've seen and found solutions for.
 
-```
-  File "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
-    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
-RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
-```
+### Doesn't work anymore?
 
-If you get that error, do one of the following.
+We are using PyTorch nightly, which includes support for MPS. I don't know exactly how Anaconda does updates, but on morning everything quit working and I couldn't think of anything I did that would've changed anything. I eventually got it working again. I don't know what changed overnight. PyTorch-nightly changes overnight but I'm pretty sure I didn't manually update it. Either way, things are probably going to be bumpy on Apple Silicon until PyTorch releases a firm version that we can lock to.
 
-### Patch
+To manually update to the latest version of PyTorch nightly (which could fix issues), run this command.
 
-Select the text for the file that has the error (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py"). Copy the path and use it in the command below.
+	conda install pytorch torchvision torchaudio -c pytorch-nightly
 
-```
-patch /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py < PatchFile
-```
+### "No module named cv2" (or some other module)
 
-If you get this error:
+Did you remember to `conda activate ldm`? If your terminal prompt begins with "(ldm)" then you activated it. If it begins with "(base)" or something else you haven't.
 
-```
-patch: **** Can't rename file /tmp/po3mqAhy to /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py : Permission denied
-```
+If you have activated the ldm virtual environment, the problem could be that I have something installed that you don't and you'll just need to manually install it.
+
+	pip install *name*
 
-Just run the patch again but with sudo, like this.
+### "The operator [name] is not current implemented for the MPS device." (sic)
+
+Example error.
 
 ```
-sudo patch /opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py < PatchFile
+...
+NotImplementedError: The operator 'aten::index.Tensor' is not current implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on [https://github.com/pytorch/pytorch/issues/77764](https://github.com/pytorch/pytorch/issues/77764). As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.
 ```
 
-### Or Manual Fix
+Just do what it says:
 
-Instead of patching you can manually edit the file it says (in my case it's "/opt/anaconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py") and find the line it says (2511 in my case), and add ".contiguous()" to the end of "input". Like this.
+	export PYTORCH_ENABLE_MPS_FALLBACK=1
 
-```python
-    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
-```
+### "Could not build wheels for tokenizers"
+
+I have not seen this error because I had Rust installed on my computer before I started playing with Stable Diffusion. The fix is to install Rust.
+
+	curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+
+### How come `--seed` doesn't work?
+
+"Completely reproducible results are not guaranteed across PyTorch releases, individual commits, or different platforms. Furthermore, results may not be reproducible between CPU and GPU executions, even when using identical seeds." -- [PyTorch docs](https://pytorch.org/docs/stable/notes/randomness.html)
+
+There is an [open issue](https://github.com/pytorch/pytorch/issues/78035) (as of August 2022) in pytorch regarding gradient inconsistency. I am guessing that's what is causing this.
+
+### libiomp5.dylib error?
 
-to
+	OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized.
+
+There are several things you can do. First, you could use something besides Anaconda like miniforge. I read a lot of things online telling people to use something else, but I am stuck with Anaconda for other reasons.
+
+Or you can try this.
+
+	export KMP_DUPLICATE_LIB_OK=True
+
+Or this (which takes forever on my computer and didn't work anyway).
+
+	conda install nomkl
+
+This error happens with Anaconda on Macs, and [nomkl](https://stackoverflow.com/questions/66224879/what-is-the-nomkl-python-package-used-for) is supposed to fix the issue (it isn't a module but a fix of some sort). [There's more suggestions](https://stackoverflow.com/questions/53014306/error-15-initializing-libiomp5-dylib-but-found-libiomp5-dylib-already-initial), like uninstalling tensorflow and reinstalling. I haven't tried them.
+
+### Not enough memory.
+
+This seems to be a common problem and is probably the underlying problem for a lot of symptoms (listed below). The fix is to lower your image size or to add `model.half()` right after the model is loaded. I should probably test it out. I've read that the reason this fixes problems is because it converts the model from 32-bit to 16-bit and that leaves more RAM for other things. I have no idea how that would affect the quality of the images though.
+
+See [this issue](https://github.com/CompVis/stable-diffusion/issues/71).
+
+### "Error: product of dimension sizes > 2**31'"
+
+This error happens with img2img, which I haven't played with too much yet. But I know it's because your image is too big or the resolution isn't a multiple of 32x32. Because the stable-diffusion model was trained on images that were 512 x 512, it's always best to use that output size (which is the default). However, if you're using that size and you get the above error, try 256 x 256 or 512 x 256 or something as the source image.
+
+BTW, 2**31-1 = [2,147,483,647](https://en.wikipedia.org/wiki/2,147,483,647#In_computing), which is also 32-bit signed [LONG_MAX](https://en.wikipedia.org/wiki/C_data_types) in C.
+
+### I just got Rickrolled! Do I have a virus?
+
+You don't have a virus. It's part of the project. Here's [Rick](https://github.com/magnusviri/stable-diffusion/blob/main/assets/rick.jpeg) and here's [the code](https://github.com/magnusviri/stable-diffusion/blob/69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc/scripts/txt2img.py#L79) that swaps him in. It's a NSFW filter, which IMO, doesn't work very good (and we call this "computer vision", sheesh).
+
+Actually, this could be happening because there's not enough RAM. You could try the `model.half()` suggestion or specify smaller output images.
+
+### My images come out black
+
+I haven't solved this issue. I just throw away my black images. There's a [similar issue](https://github.com/CompVis/stable-diffusion/issues/69) on CUDA GPU's where the images come out green. Maybe it's the same issue? Someone in that issue says to use "--precision full", but this fork actually disables that flag. I don't know why, someone else provided that code and I don't know what it does. Maybe the `model.half()` suggestion above would fix this issue too. I should probably test it.
+
+### "view size is not compatible with input tensor's size and stride"
 
-```python
-    return torch.layer_norm(input.contiguous(), normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
 ```
+  File "/opt/anaconda3/envs/ldm/lib/python3.10/site-packages/torch/nn/functional.py", line 2511, in layer_norm
+    return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
+RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
+```
+
+Update to the latest version of magnusviri/stable-diffusion. We were patching pytorch but we found a file in stable-diffusion that we could change instead. This is a 32-bit vs 16-bit problem.
 
 ### Still slow?
 
-Finally! You should also reduce the number of samples or else it will still take forever. Like this.
+I changed the defaults of n_samples and n_iter to 1 so that it uses less RAM and makes less images so it will be faster the first time you use it. I don't actually know what n_samples does internally, but I know it consumes a lot more RAM. The n_iter flag just loops around the image creation code, so it shouldn't consume more RAM (it should be if you're going to do multiple images because the libraries and model will already be loaded).
 
-```bash
-python scripts/txt2img.py --prompt "ocean" --plms --n_samples=1 --n_rows=1 --n_iter=1
-```
+This is the default in this fork/branch:
 
+	python scripts/txt2img.py --prompt "ocean" --plms --n_samples=1 --n_rows=1 --n_iter=1
 
 # Stable Diffusion
 *Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*

From c7a41f48a4e8d185930a6debaf1f966e5bab1d1a Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Fri, 26 Aug 2022 21:37:20 -0600
Subject: [PATCH 12/17] Added lots more info

---
 README.md | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 1b7e421b6..340158895 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Apple Silicon Mac Users
 
-Several people have gotten Stable Diffusion to work on Apple Silicon Macs using Anaconda. I've gathered up most of their instructions and put them in this fork (and readme). I haven't tested anything besides Anaconda, and I've read about issues with things like miniforge, so it's best if you have an issue that isn't dealt with in this fork head on over to the [Apple Silicon](https://github.com/CompVis/stable-diffusion/issues/25) issue on GitHub (that page is so long that GitHub hides most of it by default, so you need to find the hidden part and expand it to view the whole thing). This fork would not have been possible without the work done by the people on that issue.
+Several people have gotten Stable Diffusion to work on Apple Silicon Macs using Anaconda. I've gathered up most of their instructions and put them in this fork (and readme). I haven't tested anything besides Anaconda, and I've read about issues with things like miniforge, so if you have an issue that isn't dealt with in this fork then head on over to the [Apple Silicon](https://github.com/CompVis/stable-diffusion/issues/25) issue on GitHub (that page is so long that GitHub hides most of it by default, so you need to find the hidden part and expand it to view the whole thing). This fork would not have been possible without the work done by the people on that issue.
 
 You have to have macOS 12.3 Monterey or later. Anything earlier than that won't work.
 
@@ -26,7 +26,7 @@ After you follow all the instructions and run txt2img.py you might get several e
 
 ### Doesn't work anymore?
 
-We are using PyTorch nightly, which includes support for MPS. I don't know exactly how Anaconda does updates, but on morning everything quit working and I couldn't think of anything I did that would've changed anything. I eventually got it working again. I don't know what changed overnight. PyTorch-nightly changes overnight but I'm pretty sure I didn't manually update it. Either way, things are probably going to be bumpy on Apple Silicon until PyTorch releases a firm version that we can lock to.
+We are using PyTorch nightly, which includes support for MPS. I don't know exactly how Anaconda does updates, but I woke up one morning and Stable Diffusion crashed and I couldn't think of anything I did that would've changed anything the night before, when it worked. A day and a half later I finally got it working again. I don't know what changed overnight. PyTorch-nightly changes overnight but I'm pretty sure I didn't manually update it. Either way, things are probably going to be bumpy on Apple Silicon until PyTorch releases a firm version that we can lock to.
 
 To manually update to the latest version of PyTorch nightly (which could fix issues), run this command.
 
@@ -36,10 +36,12 @@ To manually update to the latest version of PyTorch nightly (which could fix iss
 
 Did you remember to `conda activate ldm`? If your terminal prompt begins with "(ldm)" then you activated it. If it begins with "(base)" or something else you haven't.
 
-If you have activated the ldm virtual environment, the problem could be that I have something installed that you don't and you'll just need to manually install it.
+If you have activated the ldm virtual environment, the problem could be that I have something installed that you don't and you'll just need to manually install it. 
 
 	pip install *name*
 
+You might also need to install Rust (I mention this again below).
+
 ### "The operator [name] is not current implemented for the MPS device." (sic)
 
 Example error.
@@ -61,7 +63,9 @@ I have not seen this error because I had Rust installed on my computer before I
 
 ### How come `--seed` doesn't work?
 
-"Completely reproducible results are not guaranteed across PyTorch releases, individual commits, or different platforms. Furthermore, results may not be reproducible between CPU and GPU executions, even when using identical seeds." -- [PyTorch docs](https://pytorch.org/docs/stable/notes/randomness.html)
+> Completely reproducible results are not guaranteed across PyTorch releases, individual commits, or different platforms. Furthermore, results may not be reproducible between CPU and GPU executions, even when using identical seeds.
+
+[PyTorch docs](https://pytorch.org/docs/stable/notes/randomness.html)
 
 There is an [open issue](https://github.com/pytorch/pytorch/issues/78035) (as of August 2022) in pytorch regarding gradient inconsistency. I am guessing that's what is causing this.
 
@@ -115,11 +119,13 @@ Update to the latest version of magnusviri/stable-diffusion. We were patching py
 
 ### Still slow?
 
-I changed the defaults of n_samples and n_iter to 1 so that it uses less RAM and makes less images so it will be faster the first time you use it. I don't actually know what n_samples does internally, but I know it consumes a lot more RAM. The n_iter flag just loops around the image creation code, so it shouldn't consume more RAM (it should be if you're going to do multiple images because the libraries and model will already be loaded).
+I changed the defaults of n_samples and n_iter to 1 so that it uses less RAM and makes less images so it will be faster the first time you use it. I don't actually know what n_samples does internally, but I know it consumes a lot more RAM. The n_iter flag just loops around the image creation code, so it shouldn't consume more RAM (it should be faster if you're going to do multiple images because the libraries and model will already be loaded--use a prompt file to get this speed boost).
+
+These flags are the default sample and iter settings in this fork/branch:
 
-This is the default in this fork/branch:
+	python scripts/txt2img.py --prompt "ocean" --n_samples=1 --n_iter=1
 
-	python scripts/txt2img.py --prompt "ocean" --plms --n_samples=1 --n_rows=1 --n_iter=1
+Happy fuzzy internet image copying!
 
 # Stable Diffusion
 *Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*

From 6cbed825463539f1baf83054c907b9484a0570fd Mon Sep 17 00:00:00 2001
From: Antti Tarvainen <antti.tarvainen@iki.fi>
Date: Sun, 28 Aug 2022 11:58:00 +0300
Subject: [PATCH 13/17] Load pytorch from pytorch-nightly, not from conda-forge

The order of the channels list is significant. The package is installed from the first channel where it is found. Since we want to install the nightly version of pytorch, it should appear in the list first.

See https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-channels.html for exact rules of priority.
---
 environment-mac.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment-mac.yaml b/environment-mac.yaml
index f8f71be1e..71631ec35 100644
--- a/environment-mac.yaml
+++ b/environment-mac.yaml
@@ -1,8 +1,8 @@
 name: ldm
 channels:
+  - pytorch-nightly
   - apple
   - conda-forge
-  - pytorch-nightly
   - defaults
 dependencies:
   - python=3.10.4

From 8f73da131b65cf1ee9a5205a677f58f70d90da68 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Sun, 28 Aug 2022 22:57:11 -0600
Subject: [PATCH 14/17] Added message about moving to lstein

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 340158895..8e96e674d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # Apple Silicon Mac Users
 
+NOTE: I have submitted a merge request to move the changes in this repo to the [lstein fork of stable-diffusion](https://github.com/lstein/stable-diffusion/) because he has so many wonderful features in his fork! Another fork that I know that has up-to-date Mac Support and some really cool features is the [Birch-san stable-diffusion fork](https://github.com/Birch-san/stable-diffusion). If my pull request to lstein is accepted, I no longer plan on updating this fork with the latest changes!
+
 Several people have gotten Stable Diffusion to work on Apple Silicon Macs using Anaconda. I've gathered up most of their instructions and put them in this fork (and readme). I haven't tested anything besides Anaconda, and I've read about issues with things like miniforge, so if you have an issue that isn't dealt with in this fork then head on over to the [Apple Silicon](https://github.com/CompVis/stable-diffusion/issues/25) issue on GitHub (that page is so long that GitHub hides most of it by default, so you need to find the hidden part and expand it to view the whole thing). This fork would not have been possible without the work done by the people on that issue.
 
 You have to have macOS 12.3 Monterey or later. Anything earlier than that won't work.
@@ -38,6 +40,7 @@ Did you remember to `conda activate ldm`? If your terminal prompt begins with "(
 
 If you have activated the ldm virtual environment, the problem could be that I have something installed that you don't and you'll just need to manually install it. 
 
+	conda activate ldm
 	pip install *name*
 
 You might also need to install Rust (I mention this again below).

From 3bad2265e1c2b1540526bf0bd0eb6d9c4a69b4cc Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Mon, 29 Aug 2022 00:10:17 -0600
Subject: [PATCH 15/17] Fix --fixed_code (fixes seed)

---
 scripts/img2img.py | 6 ++++++
 scripts/txt2img.py | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/scripts/img2img.py b/scripts/img2img.py
index 40bced669..543e0f5b9 100644
--- a/scripts/img2img.py
+++ b/scripts/img2img.py
@@ -237,6 +237,12 @@ def main():
     base_count = len(os.listdir(sample_path))
     grid_count = len(os.listdir(outpath)) - 1
 
+    start_code = None
+    if opt.fixed_code:
+        start_code = torch.randn(
+            [opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device="cpu"
+        ).to(torch.device(device))
+
     assert os.path.isfile(opt.init_img)
     init_image = load_img(opt.init_img).to(device)
     init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
diff --git a/scripts/txt2img.py b/scripts/txt2img.py
index 1a19fc890..e9bfac1c8 100644
--- a/scripts/txt2img.py
+++ b/scripts/txt2img.py
@@ -284,7 +284,9 @@ def main():
 
     start_code = None
     if opt.fixed_code:
-        start_code = torch.randn([opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device=device)
+        start_code = torch.randn(
+            [opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device="cpu"
+        ).to(torch.device(device))
 
     precision_scope = autocast if opt.precision=="autocast" else nullcontext
     if device.type == 'mps':

From 544fc97178fbb87162c422bc292d6893d26d3054 Mon Sep 17 00:00:00 2001
From: James Reynolds <magnsuviri@me.com>
Date: Mon, 29 Aug 2022 00:41:17 -0600
Subject: [PATCH 16/17] Undo blind change that didn't work

---
 scripts/img2img.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/img2img.py b/scripts/img2img.py
index 543e0f5b9..40bced669 100644
--- a/scripts/img2img.py
+++ b/scripts/img2img.py
@@ -237,12 +237,6 @@ def main():
     base_count = len(os.listdir(sample_path))
     grid_count = len(os.listdir(outpath)) - 1
 
-    start_code = None
-    if opt.fixed_code:
-        start_code = torch.randn(
-            [opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device="cpu"
-        ).to(torch.device(device))
-
     assert os.path.isfile(opt.init_img)
     init_image = load_img(opt.init_img).to(device)
     init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)

From 3c9f69e1def5e4750a7bdb9888c30dac611e13b0 Mon Sep 17 00:00:00 2001
From: Ben Firshman <ben@firshman.co.uk>
Date: Wed, 31 Aug 2022 13:56:44 -0700
Subject: [PATCH 17/17] Make it work with plain pip

---
 requirements.txt   | 21 +++++++++++++++++++++
 scripts/txt2img.py |  2 ++
 2 files changed, 23 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..2fd6c8194
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+numpy==1.23.1
+--pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+albumentations==0.4.6
+diffusers
+opencv-python==4.6.0.66
+pudb==2019.2
+invisible-watermark
+imageio==2.9.0
+imageio-ffmpeg==0.4.2
+pytorch-lightning==1.4.2
+omegaconf==2.1.1
+test-tube>=0.7.5
+streamlit>=0.73.1
+einops==0.3.0
+torch-fidelity==0.3.0
+transformers==4.19.2
+torchmetrics==0.6.0
+kornia==0.6
+-e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
+-e git+https://github.com/openai/CLIP.git@main#egg=clip
diff --git a/scripts/txt2img.py b/scripts/txt2img.py
index e9bfac1c8..6a8a04c68 100644
--- a/scripts/txt2img.py
+++ b/scripts/txt2img.py
@@ -14,6 +14,8 @@
 from torch import autocast
 from contextlib import contextmanager, nullcontext
 
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+
 from ldm.util import instantiate_from_config
 from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.models.diffusion.plms import PLMSSampler