📝 update readme

nateraw · Oct 7, 2022 · ed95d7c · ed95d7c
1 parent 5015d19
commit ed95d7c
Showing 1 changed file with 49 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -45,28 +45,72 @@ huggingface-cli login
 #### Programatic Usage
 
 ```python
-import torch
-
 from stable_diffusion_videos import StableDiffusionWalkPipeline
+from diffusers.schedulers import LMSDiscreteScheduler
+import torch
 
 pipeline = StableDiffusionWalkPipeline.from_pretrained(
     "CompVis/stable-diffusion-v1-4",
     use_auth_token=True,
     torch_dtype=torch.float16,
     revision="fp16",
-).to('cuda')
+    scheduler=LMSDiscreteScheduler(
+        beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
+    )
+).to("cuda")
 
-pipeline.walk(
+video_path = pipeline.walk(
     prompts=['a cat', 'a dog'],
     seeds=[42, 1337],
-    num_interpolation_steps=5,  # Change to 60-200 for better results...3-5 for testing
+    num_interpolation_steps=3,
+    height=512,  # use multiples of 64 if > 512. Multiples of 8 if < 512.
+    width=512,   # use multiples of 64 if > 512. Multiples of 8 if < 512.
     output_dir='dreams',        # Where images/videos will be saved
     name='animals_test',        # Subdirectory of output_dir where images/videos will be saved
     guidance_scale=8.5,         # Higher adheres to prompt more, lower lets model take the wheel
     num_inference_steps=50,     # Number of diffusion steps per image generated. 50 is good default
 )
 ```
 
+*New!* Music can be added to the video by providing a path to an audio file. The audio will inform the rate of interpolation so the videos move to the beat 🎶
+
+```python
+from stable_diffusion_videos import StableDiffusionWalkPipeline
+from diffusers.schedulers import LMSDiscreteScheduler
+import torch
+
+pipeline = StableDiffusionWalkPipeline.from_pretrained(
+    "CompVis/stable-diffusion-v1-4",
+    use_auth_token=True,
+    torch_dtype=torch.float16,
+    revision="fp16",
+    scheduler=LMSDiscreteScheduler(
+        beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
+    )
+).to("cuda")
+
+
+# Seconds in the song.
+audio_offsets = [146, 148]
+fps = 30  # Use lower values for testing (5 or 10), higher values for better quality (30 or 60)
+
+# Convert seconds to frames
+num_interpolation_steps = [(b-a) * fps for a, b in zip(audio_offsets, audio_offsets[1:])]
+
+video_path = pipeline.walk(
+    prompts=['a cat', 'a dog'],
+    seeds=[42, 1337],
+    num_interpolation_steps=num_interpolation_steps,
+    audio_filepath='audio.mp3',
+    audio_start_sec=audio_offsets[0],
+    height=512,  # use multiples of 64 if > 512. Multiples of 8 if < 512.
+    width=512,   # use multiples of 64 if > 512. Multiples of 8 if < 512.
+    output_dir='dreams',        # Where images/videos will be saved
+    guidance_scale=7.5,         # Higher adheres to prompt more, lower lets model take the wheel
+    num_inference_steps=50,     # Number of diffusion steps per image generated. 50 is good default
+)
+```
+
 #### Run the App Locally
 
 ```python