Skip to content

Commit

Permalink
Merge pull request #65 from nateraw/pipeline-refactor
Browse files Browse the repository at this point in the history
🎨 big restructure
  • Loading branch information
nateraw authored Oct 7, 2022
2 parents d44b066 + ed95d7c commit 4e98524
Show file tree
Hide file tree
Showing 8 changed files with 773 additions and 498 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,5 @@ dmypy.json
# Extra stuff to ignore
dreams
images
run.py
run.py
examples
72 changes: 59 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,69 @@ huggingface-cli login
#### Programatic Usage

```python
from stable_diffusion_videos import walk

walk(
from stable_diffusion_videos import StableDiffusionWalkPipeline
from diffusers.schedulers import LMSDiscreteScheduler
import torch

pipeline = StableDiffusionWalkPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4",
use_auth_token=True,
torch_dtype=torch.float16,
revision="fp16",
scheduler=LMSDiscreteScheduler(
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
)
).to("cuda")

video_path = pipeline.walk(
prompts=['a cat', 'a dog'],
seeds=[42, 1337],
num_interpolation_steps=3,
height=512, # use multiples of 64 if > 512. Multiples of 8 if < 512.
width=512, # use multiples of 64 if > 512. Multiples of 8 if < 512.
output_dir='dreams', # Where images/videos will be saved
name='animals_test', # Subdirectory of output_dir where images/videos will be saved
guidance_scale=8.5, # Higher adheres to prompt more, lower lets model take the wheel
num_interpolation_steps=5, # Change to 60-200 for better results...3-5 for testing
num_inference_steps=50, # Number of diffusion steps per image generated. 50 is good default.
scheduler='klms', # One of: "klms", "default", "ddim"
disable_tqdm=False, # Set to True to disable tqdm progress bar
make_video=True, # If false, just save images
use_lerp_for_text=True, # Use lerp for text embeddings instead of slerp
do_loop=False, # Change to True if you want last prompt to loop back to first prompt
num_inference_steps=50, # Number of diffusion steps per image generated. 50 is good default
)
```

*New!* Music can be added to the video by providing a path to an audio file. The audio will inform the rate of interpolation so the videos move to the beat 🎶

```python
from stable_diffusion_videos import StableDiffusionWalkPipeline
from diffusers.schedulers import LMSDiscreteScheduler
import torch

pipeline = StableDiffusionWalkPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4",
use_auth_token=True,
torch_dtype=torch.float16,
revision="fp16",
scheduler=LMSDiscreteScheduler(
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
)
).to("cuda")


# Seconds in the song.
audio_offsets = [146, 148]
fps = 30 # Use lower values for testing (5 or 10), higher values for better quality (30 or 60)

# Convert seconds to frames
num_interpolation_steps = [(b-a) * fps for a, b in zip(audio_offsets, audio_offsets[1:])]

video_path = pipeline.walk(
prompts=['a cat', 'a dog'],
seeds=[42, 1337],
num_interpolation_steps=num_interpolation_steps,
audio_filepath='audio.mp3',
audio_start_sec=audio_offsets[0],
height=512, # use multiples of 64 if > 512. Multiples of 8 if < 512.
width=512, # use multiples of 64 if > 512. Multiples of 8 if < 512.
output_dir='dreams', # Where images/videos will be saved
guidance_scale=7.5, # Higher adheres to prompt more, lower lets model take the wheel
num_inference_steps=50, # Number of diffusion steps per image generated. 50 is good default
)
```

Expand Down Expand Up @@ -97,9 +145,7 @@ pip install realesrgan
Then, you'll be able to use `upsample=True` in the `walk` function, like this:

```python
from stable_diffusion_videos import walk

walk(['a cat', 'a dog'], [234, 345], upsample=True)
pipeline.walk(['a cat', 'a dog'], [234, 345], upsample=True)
```

The above may cause you to run out of VRAM. No problem, you can do upsampling separately.
Expand Down
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
transformers
diffusers==0.3.0
diffusers==0.4.0
scipy
fire
gradio
gradio
librosa
av
22 changes: 7 additions & 15 deletions stable_diffusion_videos/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ def _attach(package_name, submodules=None, submod_attrs=None):
else:
submodules = set(submodules)

attr_to_modules = {
attr: mod for mod, attrs in submod_attrs.items() for attr in attrs
}
attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}

__all__ = list(submodules | attr_to_modules.keys())

Expand Down Expand Up @@ -96,28 +94,22 @@ def __dir__():
return __getattr__, __dir__, list(__all__)



__getattr__, __dir__, __all__ = _attach(
__name__,
submodules=[],
submod_attrs={
"commands.user": ["notebook_login"],
"app": [
"interface",
"pipeline",
],
"stable_diffusion_pipeline": [
"StableDiffusionPipeline",
"StableDiffusionWalkPipeline",
"NoCheck",
"make_video_pyav",
"get_timesteps_arr",
],
"stable_diffusion_walk": [
"walk",
"SCHEDULERS",
"pipeline",
],
"upsampling": [
"PipelineRealESRGAN"
]
"upsampling": ["PipelineRealESRGAN"],
},
)

__version__ = "0.4.0"
__version__ = "0.5.0"
57 changes: 21 additions & 36 deletions stable_diffusion_videos/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,48 @@
import gradio as gr
import torch

from .stable_diffusion_walk import SCHEDULERS, pipeline, walk
from .stable_diffusion_pipeline import StableDiffusionWalkPipeline
from .upsampling import RealESRGANModel

pipeline = StableDiffusionWalkPipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4",
use_auth_token=True,
torch_dtype=torch.float16,
revision="fp16",
).to("cuda")


def fn_images(
prompt,
seed,
scheduler,
guidance_scale,
num_inference_steps,
disable_tqdm,
upsample,
):
if upsample:
from .upsampling import PipelineRealESRGAN

upsampling_pipeline = PipelineRealESRGAN.from_pretrained('nateraw/real-esrgan')
if getattr(pipeline, "upsampler", None) is None:
pipeline.upsampler = RealESRGANModel.from_pretrained("nateraw/real-esrgan")
pipeline.upsampler.to(pipeline.device)

pipeline.set_progress_bar_config(disable=disable_tqdm)
pipeline.scheduler = SCHEDULERS[scheduler] # klms, default, ddim
with torch.autocast("cuda"):
img = pipeline(
prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=torch.Generator(device=pipeline.device).manual_seed(seed),
output_type='pil' if not upsample else 'numpy',
output_type="pil" if not upsample else "numpy",
)["sample"][0]
return img if not upsample else upsampling_pipeline(img)
return img if not upsample else pipeline.upsampler(img)


def fn_videos(
prompt_1,
seed_1,
prompt_2,
seed_2,
scheduler,
guidance_scale,
num_inference_steps,
num_interpolation_steps,
do_loop,
disable_tqdm,
use_lerp_for_text,
output_dir,
upsample,
):
Expand All @@ -54,20 +54,15 @@ def fn_videos(
prompts = [x for x in prompts if x.strip()]
seeds = seeds[: len(prompts)]

video_path = walk(
do_loop=do_loop,
make_video=True,
video_path = pipeline.walk(
guidance_scale=guidance_scale,
prompts=prompts,
seeds=seeds,
num_interpolation_steps=num_interpolation_steps,
num_inference_steps=num_inference_steps,
use_lerp_for_text=use_lerp_for_text,
output_dir=output_dir,
name=time.strftime("%Y%m%d-%H%M%S"),
scheduler=scheduler,
disable_tqdm=disable_tqdm,
upsample=upsample
upsample=upsample,
)
return video_path

Expand All @@ -76,21 +71,15 @@ def fn_videos(
fn_videos,
inputs=[
gr.Textbox("blueberry spaghetti"),
gr.Number(42, label='Seed 1', precision=0),
gr.Number(42, label="Seed 1", precision=0),
gr.Textbox("strawberry spaghetti"),
gr.Number(42, label='Seed 2', precision=0),
gr.Dropdown(["klms", "ddim", "default"], value="klms"),
gr.Number(42, label="Seed 2", precision=0),
gr.Slider(0.0, 20.0, 8.5),
gr.Slider(1, 200, 50),
gr.Slider(3, 240, 10),
gr.Checkbox(False),
gr.Checkbox(False),
gr.Checkbox(True),
gr.Textbox(
"dreams",
placeholder=(
"Folder where outputs will be saved. Each output will be saved in a new folder."
),
placeholder=("Folder where outputs will be saved. Each output will be saved in a new folder."),
),
gr.Checkbox(False),
],
Expand All @@ -101,19 +90,15 @@ def fn_videos(
fn_images,
inputs=[
gr.Textbox("blueberry spaghetti"),
gr.Number(42, label='Seed', precision=0),
gr.Dropdown(["klms", "ddim", "default"], value="klms"),
gr.Number(42, label="Seed", precision=0),
gr.Slider(0.0, 20.0, 8.5),
gr.Slider(1, 200, 50),
gr.Checkbox(False),
gr.Checkbox(False),
],
outputs=gr.Image(type="pil"),
)

interface = gr.TabbedInterface(
[interface_images, interface_videos], ["Images!", "Videos!"]
)
interface = gr.TabbedInterface([interface_images, interface_videos], ["Images!", "Videos!"])

if __name__ == "__main__":
interface.launch(debug=True)
Loading

0 comments on commit 4e98524

Please sign in to comment.