diff --git a/README.md b/README.md index c073b59..d1253bf 100644 --- a/README.md +++ b/README.md @@ -45,21 +45,26 @@ huggingface-cli login #### Programatic Usage ```python -import torch - from stable_diffusion_videos import StableDiffusionWalkPipeline +from diffusers.schedulers import LMSDiscreteScheduler +import torch pipeline = StableDiffusionWalkPipeline.from_pretrained( "CompVis/stable-diffusion-v1-4", use_auth_token=True, torch_dtype=torch.float16, revision="fp16", -).to('cuda') + scheduler=LMSDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) +).to("cuda") -pipeline.walk( +video_path = pipeline.walk( prompts=['a cat', 'a dog'], seeds=[42, 1337], - num_interpolation_steps=5, # Change to 60-200 for better results...3-5 for testing + num_interpolation_steps=3, + height=512, # use multiples of 64 if > 512. Multiples of 8 if < 512. + width=512, # use multiples of 64 if > 512. Multiples of 8 if < 512. output_dir='dreams', # Where images/videos will be saved name='animals_test', # Subdirectory of output_dir where images/videos will be saved guidance_scale=8.5, # Higher adheres to prompt more, lower lets model take the wheel @@ -67,6 +72,45 @@ pipeline.walk( ) ``` +*New!* Music can be added to the video by providing a path to an audio file. The audio will inform the rate of interpolation so the videos move to the beat 🎶 + +```python +from stable_diffusion_videos import StableDiffusionWalkPipeline +from diffusers.schedulers import LMSDiscreteScheduler +import torch + +pipeline = StableDiffusionWalkPipeline.from_pretrained( + "CompVis/stable-diffusion-v1-4", + use_auth_token=True, + torch_dtype=torch.float16, + revision="fp16", + scheduler=LMSDiscreteScheduler( + beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear" + ) +).to("cuda") + + +# Seconds in the song. +audio_offsets = [146, 148] +fps = 30 # Use lower values for testing (5 or 10), higher values for better quality (30 or 60) + +# Convert seconds to frames +num_interpolation_steps = [(b-a) * fps for a, b in zip(audio_offsets, audio_offsets[1:])] + +video_path = pipeline.walk( + prompts=['a cat', 'a dog'], + seeds=[42, 1337], + num_interpolation_steps=num_interpolation_steps, + audio_filepath='audio.mp3', + audio_start_sec=audio_offsets[0], + height=512, # use multiples of 64 if > 512. Multiples of 8 if < 512. + width=512, # use multiples of 64 if > 512. Multiples of 8 if < 512. + output_dir='dreams', # Where images/videos will be saved + guidance_scale=7.5, # Higher adheres to prompt more, lower lets model take the wheel + num_inference_steps=50, # Number of diffusion steps per image generated. 50 is good default +) +``` + #### Run the App Locally ```python